1249259Sdim//===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim// This pass exposes codegen information to IR-level passes. Every 11249259Sdim// transformation that uses codegen information is broken into three parts: 12249259Sdim// 1. The IR-level analysis pass. 13249259Sdim// 2. The IR-level transformation interface which provides the needed 14249259Sdim// information. 15249259Sdim// 3. Codegen-level implementation which uses target-specific hooks. 16249259Sdim// 17249259Sdim// This file defines #2, which is the interface that IR-level transformations 18249259Sdim// use for querying the codegen. 19249259Sdim// 20249259Sdim//===----------------------------------------------------------------------===// 21249259Sdim 22249259Sdim#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23249259Sdim#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 24249259Sdim 25249259Sdim#include "llvm/IR/Intrinsics.h" 26249259Sdim#include "llvm/Pass.h" 27249259Sdim#include "llvm/Support/DataTypes.h" 28249259Sdim 29249259Sdimnamespace llvm { 30249259Sdim 31249259Sdimclass GlobalValue; 32263508Sdimclass Loop; 33249259Sdimclass Type; 34249259Sdimclass User; 35249259Sdimclass Value; 36249259Sdim 37249259Sdim/// TargetTransformInfo - This pass provides access to the codegen 38249259Sdim/// interfaces that are needed for IR-level transformations. 39249259Sdimclass TargetTransformInfo { 40249259Sdimprotected: 41249259Sdim /// \brief The TTI instance one level down the stack. 42249259Sdim /// 43249259Sdim /// This is used to implement the default behavior all of the methods which 44249259Sdim /// is to delegate up through the stack of TTIs until one can answer the 45249259Sdim /// query. 46249259Sdim TargetTransformInfo *PrevTTI; 47249259Sdim 48249259Sdim /// \brief The top of the stack of TTI analyses available. 49249259Sdim /// 50249259Sdim /// This is a convenience routine maintained as TTI analyses become available 51249259Sdim /// that complements the PrevTTI delegation chain. When one part of an 52249259Sdim /// analysis pass wants to query another part of the analysis pass it can use 53249259Sdim /// this to start back at the top of the stack. 54249259Sdim TargetTransformInfo *TopTTI; 55249259Sdim 56249259Sdim /// All pass subclasses must in their initializePass routine call 57249259Sdim /// pushTTIStack with themselves to update the pointers tracking the previous 58249259Sdim /// TTI instance in the analysis group's stack, and the top of the analysis 59249259Sdim /// group's stack. 60249259Sdim void pushTTIStack(Pass *P); 61249259Sdim 62249259Sdim /// All pass subclasses must in their finalizePass routine call popTTIStack 63249259Sdim /// to update the pointers tracking the previous TTI instance in the analysis 64249259Sdim /// group's stack, and the top of the analysis group's stack. 65249259Sdim void popTTIStack(); 66249259Sdim 67249259Sdim /// All pass subclasses must call TargetTransformInfo::getAnalysisUsage. 68249259Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const; 69249259Sdim 70249259Sdimpublic: 71249259Sdim /// This class is intended to be subclassed by real implementations. 72249259Sdim virtual ~TargetTransformInfo() = 0; 73249259Sdim 74249259Sdim /// \name Generic Target Information 75249259Sdim /// @{ 76249259Sdim 77249259Sdim /// \brief Underlying constants for 'cost' values in this interface. 78249259Sdim /// 79249259Sdim /// Many APIs in this interface return a cost. This enum defines the 80249259Sdim /// fundamental values that should be used to interpret (and produce) those 81249259Sdim /// costs. The costs are returned as an unsigned rather than a member of this 82249259Sdim /// enumeration because it is expected that the cost of one IR instruction 83249259Sdim /// may have a multiplicative factor to it or otherwise won't fit directly 84249259Sdim /// into the enum. Moreover, it is common to sum or average costs which works 85249259Sdim /// better as simple integral values. Thus this enum only provides constants. 86249259Sdim /// 87249259Sdim /// Note that these costs should usually reflect the intersection of code-size 88249259Sdim /// cost and execution cost. A free instruction is typically one that folds 89249259Sdim /// into another instruction. For example, reg-to-reg moves can often be 90249259Sdim /// skipped by renaming the registers in the CPU, but they still are encoded 91249259Sdim /// and thus wouldn't be considered 'free' here. 92249259Sdim enum TargetCostConstants { 93249259Sdim TCC_Free = 0, ///< Expected to fold away in lowering. 94249259Sdim TCC_Basic = 1, ///< The cost of a typical 'add' instruction. 95249259Sdim TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. 96249259Sdim }; 97249259Sdim 98249259Sdim /// \brief Estimate the cost of a specific operation when lowered. 99249259Sdim /// 100249259Sdim /// Note that this is designed to work on an arbitrary synthetic opcode, and 101249259Sdim /// thus work for hypothetical queries before an instruction has even been 102249259Sdim /// formed. However, this does *not* work for GEPs, and must not be called 103249259Sdim /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as 104249259Sdim /// analyzing a GEP's cost required more information. 105249259Sdim /// 106249259Sdim /// Typically only the result type is required, and the operand type can be 107249259Sdim /// omitted. However, if the opcode is one of the cast instructions, the 108249259Sdim /// operand type is required. 109249259Sdim /// 110249259Sdim /// The returned cost is defined in terms of \c TargetCostConstants, see its 111249259Sdim /// comments for a detailed explanation of the cost values. 112249259Sdim virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, 113249259Sdim Type *OpTy = 0) const; 114249259Sdim 115249259Sdim /// \brief Estimate the cost of a GEP operation when lowered. 116249259Sdim /// 117249259Sdim /// The contract for this function is the same as \c getOperationCost except 118249259Sdim /// that it supports an interface that provides extra information specific to 119249259Sdim /// the GEP operation. 120249259Sdim virtual unsigned getGEPCost(const Value *Ptr, 121249259Sdim ArrayRef<const Value *> Operands) const; 122249259Sdim 123249259Sdim /// \brief Estimate the cost of a function call when lowered. 124249259Sdim /// 125249259Sdim /// The contract for this is the same as \c getOperationCost except that it 126249259Sdim /// supports an interface that provides extra information specific to call 127249259Sdim /// instructions. 128249259Sdim /// 129249259Sdim /// This is the most basic query for estimating call cost: it only knows the 130249259Sdim /// function type and (potentially) the number of arguments at the call site. 131249259Sdim /// The latter is only interesting for varargs function types. 132249259Sdim virtual unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const; 133249259Sdim 134249259Sdim /// \brief Estimate the cost of calling a specific function when lowered. 135249259Sdim /// 136249259Sdim /// This overload adds the ability to reason about the particular function 137249259Sdim /// being called in the event it is a library call with special lowering. 138249259Sdim virtual unsigned getCallCost(const Function *F, int NumArgs = -1) const; 139249259Sdim 140249259Sdim /// \brief Estimate the cost of calling a specific function when lowered. 141249259Sdim /// 142249259Sdim /// This overload allows specifying a set of candidate argument values. 143249259Sdim virtual unsigned getCallCost(const Function *F, 144249259Sdim ArrayRef<const Value *> Arguments) const; 145249259Sdim 146249259Sdim /// \brief Estimate the cost of an intrinsic when lowered. 147249259Sdim /// 148249259Sdim /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 149249259Sdim virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 150249259Sdim ArrayRef<Type *> ParamTys) const; 151249259Sdim 152249259Sdim /// \brief Estimate the cost of an intrinsic when lowered. 153249259Sdim /// 154249259Sdim /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 155249259Sdim virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 156249259Sdim ArrayRef<const Value *> Arguments) const; 157249259Sdim 158249259Sdim /// \brief Estimate the cost of a given IR user when lowered. 159249259Sdim /// 160249259Sdim /// This can estimate the cost of either a ConstantExpr or Instruction when 161249259Sdim /// lowered. It has two primary advantages over the \c getOperationCost and 162249259Sdim /// \c getGEPCost above, and one significant disadvantage: it can only be 163249259Sdim /// used when the IR construct has already been formed. 164249259Sdim /// 165249259Sdim /// The advantages are that it can inspect the SSA use graph to reason more 166249259Sdim /// accurately about the cost. For example, all-constant-GEPs can often be 167249259Sdim /// folded into a load or other instruction, but if they are used in some 168249259Sdim /// other context they may not be folded. This routine can distinguish such 169249259Sdim /// cases. 170249259Sdim /// 171249259Sdim /// The returned cost is defined in terms of \c TargetCostConstants, see its 172249259Sdim /// comments for a detailed explanation of the cost values. 173249259Sdim virtual unsigned getUserCost(const User *U) const; 174249259Sdim 175263508Sdim /// \brief hasBranchDivergence - Return true if branch divergence exists. 176263508Sdim /// Branch divergence has a significantly negative impact on GPU performance 177263508Sdim /// when threads in the same wavefront take different paths due to conditional 178263508Sdim /// branches. 179263508Sdim virtual bool hasBranchDivergence() const; 180263508Sdim 181249259Sdim /// \brief Test whether calls to a function lower to actual program function 182249259Sdim /// calls. 183249259Sdim /// 184249259Sdim /// The idea is to test whether the program is likely to require a 'call' 185249259Sdim /// instruction or equivalent in order to call the given function. 186249259Sdim /// 187249259Sdim /// FIXME: It's not clear that this is a good or useful query API. Client's 188249259Sdim /// should probably move to simpler cost metrics using the above. 189249259Sdim /// Alternatively, we could split the cost interface into distinct code-size 190249259Sdim /// and execution-speed costs. This would allow modelling the core of this 191249259Sdim /// query more accurately as the a call is a single small instruction, but 192249259Sdim /// incurs significant execution cost. 193249259Sdim virtual bool isLoweredToCall(const Function *F) const; 194249259Sdim 195263508Sdim /// Parameters that control the generic loop unrolling transformation. 196263508Sdim struct UnrollingPreferences { 197263508Sdim /// The cost threshold for the unrolled loop, compared to 198263508Sdim /// CodeMetrics.NumInsts aggregated over all basic blocks in the loop body. 199263508Sdim /// The unrolling factor is set such that the unrolled loop body does not 200263508Sdim /// exceed this cost. Set this to UINT_MAX to disable the loop body cost 201263508Sdim /// restriction. 202263508Sdim unsigned Threshold; 203263508Sdim /// The cost threshold for the unrolled loop when optimizing for size (set 204263508Sdim /// to UINT_MAX to disable). 205263508Sdim unsigned OptSizeThreshold; 206263508Sdim /// A forced unrolling factor (the number of concatenated bodies of the 207263508Sdim /// original loop in the unrolled loop body). When set to 0, the unrolling 208263508Sdim /// transformation will select an unrolling factor based on the current cost 209263508Sdim /// threshold and other factors. 210263508Sdim unsigned Count; 211263508Sdim /// Allow partial unrolling (unrolling of loops to expand the size of the 212263508Sdim /// loop body, not only to eliminate small constant-trip-count loops). 213263508Sdim bool Partial; 214263508Sdim /// Allow runtime unrolling (unrolling of loops to expand the size of the 215263508Sdim /// loop body even when the number of loop iterations is not known at compile 216263508Sdim /// time). 217263508Sdim bool Runtime; 218263508Sdim }; 219263508Sdim 220263508Sdim /// \brief Get target-customized preferences for the generic loop unrolling 221263508Sdim /// transformation. The caller will initialize UP with the current 222263508Sdim /// target-independent defaults. 223263508Sdim virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; 224263508Sdim 225249259Sdim /// @} 226249259Sdim 227249259Sdim /// \name Scalar Target Information 228249259Sdim /// @{ 229249259Sdim 230249259Sdim /// \brief Flags indicating the kind of support for population count. 231249259Sdim /// 232249259Sdim /// Compared to the SW implementation, HW support is supposed to 233249259Sdim /// significantly boost the performance when the population is dense, and it 234249259Sdim /// may or may not degrade performance if the population is sparse. A HW 235249259Sdim /// support is considered as "Fast" if it can outperform, or is on a par 236249259Sdim /// with, SW implementation when the population is sparse; otherwise, it is 237249259Sdim /// considered as "Slow". 238249259Sdim enum PopcntSupportKind { 239249259Sdim PSK_Software, 240249259Sdim PSK_SlowHardware, 241249259Sdim PSK_FastHardware 242249259Sdim }; 243249259Sdim 244249259Sdim /// isLegalAddImmediate - Return true if the specified immediate is legal 245249259Sdim /// add immediate, that is the target has add instructions which can add 246249259Sdim /// a register with the immediate without having to materialize the 247249259Sdim /// immediate into a register. 248249259Sdim virtual bool isLegalAddImmediate(int64_t Imm) const; 249249259Sdim 250249259Sdim /// isLegalICmpImmediate - Return true if the specified immediate is legal 251249259Sdim /// icmp immediate, that is the target has icmp instructions which can compare 252249259Sdim /// a register against the immediate without having to materialize the 253249259Sdim /// immediate into a register. 254249259Sdim virtual bool isLegalICmpImmediate(int64_t Imm) const; 255249259Sdim 256249259Sdim /// isLegalAddressingMode - Return true if the addressing mode represented by 257249259Sdim /// AM is legal for this target, for a load/store of the specified type. 258249259Sdim /// The type may be VoidTy, in which case only return true if the addressing 259249259Sdim /// mode is legal for a load/store of any legal type. 260249259Sdim /// TODO: Handle pre/postinc as well. 261249259Sdim virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 262249259Sdim int64_t BaseOffset, bool HasBaseReg, 263249259Sdim int64_t Scale) const; 264249259Sdim 265263508Sdim /// \brief Return the cost of the scaling factor used in the addressing 266263508Sdim /// mode represented by AM for this target, for a load/store 267263508Sdim /// of the specified type. 268263508Sdim /// If the AM is supported, the return value must be >= 0. 269263508Sdim /// If the AM is not supported, it returns a negative value. 270263508Sdim /// TODO: Handle pre/postinc as well. 271263508Sdim virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 272263508Sdim int64_t BaseOffset, bool HasBaseReg, 273263508Sdim int64_t Scale) const; 274263508Sdim 275249259Sdim /// isTruncateFree - Return true if it's free to truncate a value of 276249259Sdim /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 277249259Sdim /// register EAX to i16 by referencing its sub-register AX. 278249259Sdim virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; 279249259Sdim 280249259Sdim /// Is this type legal. 281249259Sdim virtual bool isTypeLegal(Type *Ty) const; 282249259Sdim 283249259Sdim /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes 284249259Sdim virtual unsigned getJumpBufAlignment() const; 285249259Sdim 286249259Sdim /// getJumpBufSize - returns the target's jmp_buf size in bytes. 287249259Sdim virtual unsigned getJumpBufSize() const; 288249259Sdim 289249259Sdim /// shouldBuildLookupTables - Return true if switches should be turned into 290249259Sdim /// lookup tables for the target. 291249259Sdim virtual bool shouldBuildLookupTables() const; 292249259Sdim 293249259Sdim /// getPopcntSupport - Return hardware support for population count. 294249259Sdim virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; 295249259Sdim 296263508Sdim /// haveFastSqrt -- Return true if the hardware has a fast square-root 297263508Sdim /// instruction. 298263508Sdim virtual bool haveFastSqrt(Type *Ty) const; 299263508Sdim 300249259Sdim /// getIntImmCost - Return the expected cost of materializing the given 301249259Sdim /// integer immediate of the specified type. 302249259Sdim virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; 303249259Sdim 304249259Sdim /// @} 305249259Sdim 306249259Sdim /// \name Vector Target Information 307249259Sdim /// @{ 308249259Sdim 309249259Sdim /// \brief The various kinds of shuffle patterns for vector queries. 310249259Sdim enum ShuffleKind { 311249259Sdim SK_Broadcast, ///< Broadcast element 0 to all other elements. 312249259Sdim SK_Reverse, ///< Reverse the order of the vector. 313249259Sdim SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. 314249259Sdim SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset. 315249259Sdim }; 316249259Sdim 317263508Sdim /// \brief Additional information about an operand's possible values. 318249259Sdim enum OperandValueKind { 319249259Sdim OK_AnyValue, // Operand can have any value. 320249259Sdim OK_UniformValue, // Operand is uniform (splat of a value). 321249259Sdim OK_UniformConstantValue // Operand is uniform constant. 322249259Sdim }; 323249259Sdim 324249259Sdim /// \return The number of scalar or vector registers that the target has. 325249259Sdim /// If 'Vectors' is true, it returns the number of vector registers. If it is 326249259Sdim /// set to false, it returns the number of scalar registers. 327249259Sdim virtual unsigned getNumberOfRegisters(bool Vector) const; 328249259Sdim 329249259Sdim /// \return The width of the largest scalar or vector register type. 330249259Sdim virtual unsigned getRegisterBitWidth(bool Vector) const; 331249259Sdim 332249259Sdim /// \return The maximum unroll factor that the vectorizer should try to 333249259Sdim /// perform for this target. This number depends on the level of parallelism 334249259Sdim /// and the number of execution units in the CPU. 335249259Sdim virtual unsigned getMaximumUnrollFactor() const; 336249259Sdim 337249259Sdim /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. 338249259Sdim virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 339249259Sdim OperandValueKind Opd1Info = OK_AnyValue, 340249259Sdim OperandValueKind Opd2Info = OK_AnyValue) const; 341249259Sdim 342249259Sdim /// \return The cost of a shuffle instruction of kind Kind and of type Tp. 343249259Sdim /// The index and subtype parameters are used by the subvector insertion and 344249259Sdim /// extraction shuffle kinds. 345249259Sdim virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, 346249259Sdim Type *SubTp = 0) const; 347249259Sdim 348249259Sdim /// \return The expected cost of cast instructions, such as bitcast, trunc, 349249259Sdim /// zext, etc. 350249259Sdim virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 351249259Sdim Type *Src) const; 352249259Sdim 353249259Sdim /// \return The expected cost of control-flow related instructions such as 354249259Sdim /// Phi, Ret, Br. 355249259Sdim virtual unsigned getCFInstrCost(unsigned Opcode) const; 356249259Sdim 357249259Sdim /// \returns The expected cost of compare and select instructions. 358249259Sdim virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 359249259Sdim Type *CondTy = 0) const; 360249259Sdim 361249259Sdim /// \return The expected cost of vector Insert and Extract. 362249259Sdim /// Use -1 to indicate that there is no information on the index value. 363249259Sdim virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 364249259Sdim unsigned Index = -1) const; 365249259Sdim 366249259Sdim /// \return The cost of Load and Store instructions. 367249259Sdim virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 368249259Sdim unsigned Alignment, 369249259Sdim unsigned AddressSpace) const; 370249259Sdim 371263508Sdim /// \brief Calculate the cost of performing a vector reduction. 372263508Sdim /// 373263508Sdim /// This is the cost of reducing the vector value of type \p Ty to a scalar 374263508Sdim /// value using the operation denoted by \p Opcode. The form of the reduction 375263508Sdim /// can either be a pairwise reduction or a reduction that splits the vector 376263508Sdim /// at every reduction level. 377263508Sdim /// 378263508Sdim /// Pairwise: 379263508Sdim /// (v0, v1, v2, v3) 380263508Sdim /// ((v0+v1), (v2, v3), undef, undef) 381263508Sdim /// Split: 382263508Sdim /// (v0, v1, v2, v3) 383263508Sdim /// ((v0+v2), (v1+v3), undef, undef) 384263508Sdim virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, 385263508Sdim bool IsPairwiseForm) const; 386263508Sdim 387249259Sdim /// \returns The cost of Intrinsic instructions. 388249259Sdim virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 389249259Sdim ArrayRef<Type *> Tys) const; 390249259Sdim 391249259Sdim /// \returns The number of pieces into which the provided type must be 392249259Sdim /// split during legalization. Zero is returned when the answer is unknown. 393249259Sdim virtual unsigned getNumberOfParts(Type *Tp) const; 394249259Sdim 395249259Sdim /// \returns The cost of the address computation. For most targets this can be 396249259Sdim /// merged into the instruction indexing mode. Some targets might want to 397249259Sdim /// distinguish between address computation for memory operations on vector 398249259Sdim /// types and scalar types. Such targets should override this function. 399263508Sdim /// The 'IsComplex' parameter is a hint that the address computation is likely 400263508Sdim /// to involve multiple instructions and as such unlikely to be merged into 401263508Sdim /// the address indexing mode. 402263508Sdim virtual unsigned getAddressComputationCost(Type *Ty, 403263508Sdim bool IsComplex = false) const; 404249259Sdim 405249259Sdim /// @} 406249259Sdim 407249259Sdim /// Analysis group identification. 408249259Sdim static char ID; 409249259Sdim}; 410249259Sdim 411249259Sdim/// \brief Create the base case instance of a pass in the TTI analysis group. 412249259Sdim/// 413249259Sdim/// This class provides the base case for the stack of TTI analyzes. It doesn't 414249259Sdim/// delegate to anything and uses the STTI and VTTI objects passed in to 415249259Sdim/// satisfy the queries. 416249259SdimImmutablePass *createNoTargetTransformInfoPass(); 417249259Sdim 418249259Sdim} // End llvm namespace 419249259Sdim 420249259Sdim#endif 421