1249259Sdim//===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===//
2249259Sdim//
3249259Sdim//                     The LLVM Compiler Infrastructure
4249259Sdim//
5249259Sdim// This file is distributed under the University of Illinois Open Source
6249259Sdim// License. See LICENSE.TXT for details.
7249259Sdim//
8249259Sdim//===----------------------------------------------------------------------===//
9249259Sdim//
10249259Sdim// This pass exposes codegen information to IR-level passes. Every
11249259Sdim// transformation that uses codegen information is broken into three parts:
12249259Sdim// 1. The IR-level analysis pass.
13249259Sdim// 2. The IR-level transformation interface which provides the needed
14249259Sdim//    information.
15249259Sdim// 3. Codegen-level implementation which uses target-specific hooks.
16249259Sdim//
17249259Sdim// This file defines #2, which is the interface that IR-level transformations
18249259Sdim// use for querying the codegen.
19249259Sdim//
20249259Sdim//===----------------------------------------------------------------------===//
21249259Sdim
22249259Sdim#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23249259Sdim#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24249259Sdim
25249259Sdim#include "llvm/IR/Intrinsics.h"
26249259Sdim#include "llvm/Pass.h"
27249259Sdim#include "llvm/Support/DataTypes.h"
28249259Sdim
29249259Sdimnamespace llvm {
30249259Sdim
31249259Sdimclass GlobalValue;
32263508Sdimclass Loop;
33249259Sdimclass Type;
34249259Sdimclass User;
35249259Sdimclass Value;
36249259Sdim
37249259Sdim/// TargetTransformInfo - This pass provides access to the codegen
38249259Sdim/// interfaces that are needed for IR-level transformations.
39249259Sdimclass TargetTransformInfo {
40249259Sdimprotected:
41249259Sdim  /// \brief The TTI instance one level down the stack.
42249259Sdim  ///
43249259Sdim  /// This is used to implement the default behavior all of the methods which
44249259Sdim  /// is to delegate up through the stack of TTIs until one can answer the
45249259Sdim  /// query.
46249259Sdim  TargetTransformInfo *PrevTTI;
47249259Sdim
48249259Sdim  /// \brief The top of the stack of TTI analyses available.
49249259Sdim  ///
50249259Sdim  /// This is a convenience routine maintained as TTI analyses become available
51249259Sdim  /// that complements the PrevTTI delegation chain. When one part of an
52249259Sdim  /// analysis pass wants to query another part of the analysis pass it can use
53249259Sdim  /// this to start back at the top of the stack.
54249259Sdim  TargetTransformInfo *TopTTI;
55249259Sdim
56249259Sdim  /// All pass subclasses must in their initializePass routine call
57249259Sdim  /// pushTTIStack with themselves to update the pointers tracking the previous
58249259Sdim  /// TTI instance in the analysis group's stack, and the top of the analysis
59249259Sdim  /// group's stack.
60249259Sdim  void pushTTIStack(Pass *P);
61249259Sdim
62249259Sdim  /// All pass subclasses must in their finalizePass routine call popTTIStack
63249259Sdim  /// to update the pointers tracking the previous TTI instance in the analysis
64249259Sdim  /// group's stack, and the top of the analysis group's stack.
65249259Sdim  void popTTIStack();
66249259Sdim
67249259Sdim  /// All pass subclasses must call TargetTransformInfo::getAnalysisUsage.
68249259Sdim  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
69249259Sdim
70249259Sdimpublic:
71249259Sdim  /// This class is intended to be subclassed by real implementations.
72249259Sdim  virtual ~TargetTransformInfo() = 0;
73249259Sdim
74249259Sdim  /// \name Generic Target Information
75249259Sdim  /// @{
76249259Sdim
77249259Sdim  /// \brief Underlying constants for 'cost' values in this interface.
78249259Sdim  ///
79249259Sdim  /// Many APIs in this interface return a cost. This enum defines the
80249259Sdim  /// fundamental values that should be used to interpret (and produce) those
81249259Sdim  /// costs. The costs are returned as an unsigned rather than a member of this
82249259Sdim  /// enumeration because it is expected that the cost of one IR instruction
83249259Sdim  /// may have a multiplicative factor to it or otherwise won't fit directly
84249259Sdim  /// into the enum. Moreover, it is common to sum or average costs which works
85249259Sdim  /// better as simple integral values. Thus this enum only provides constants.
86249259Sdim  ///
87249259Sdim  /// Note that these costs should usually reflect the intersection of code-size
88249259Sdim  /// cost and execution cost. A free instruction is typically one that folds
89249259Sdim  /// into another instruction. For example, reg-to-reg moves can often be
90249259Sdim  /// skipped by renaming the registers in the CPU, but they still are encoded
91249259Sdim  /// and thus wouldn't be considered 'free' here.
92249259Sdim  enum TargetCostConstants {
93249259Sdim    TCC_Free = 0,       ///< Expected to fold away in lowering.
94249259Sdim    TCC_Basic = 1,      ///< The cost of a typical 'add' instruction.
95249259Sdim    TCC_Expensive = 4   ///< The cost of a 'div' instruction on x86.
96249259Sdim  };
97249259Sdim
98249259Sdim  /// \brief Estimate the cost of a specific operation when lowered.
99249259Sdim  ///
100249259Sdim  /// Note that this is designed to work on an arbitrary synthetic opcode, and
101249259Sdim  /// thus work for hypothetical queries before an instruction has even been
102249259Sdim  /// formed. However, this does *not* work for GEPs, and must not be called
103249259Sdim  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
104249259Sdim  /// analyzing a GEP's cost required more information.
105249259Sdim  ///
106249259Sdim  /// Typically only the result type is required, and the operand type can be
107249259Sdim  /// omitted. However, if the opcode is one of the cast instructions, the
108249259Sdim  /// operand type is required.
109249259Sdim  ///
110249259Sdim  /// The returned cost is defined in terms of \c TargetCostConstants, see its
111249259Sdim  /// comments for a detailed explanation of the cost values.
112249259Sdim  virtual unsigned getOperationCost(unsigned Opcode, Type *Ty,
113249259Sdim                                    Type *OpTy = 0) const;
114249259Sdim
115249259Sdim  /// \brief Estimate the cost of a GEP operation when lowered.
116249259Sdim  ///
117249259Sdim  /// The contract for this function is the same as \c getOperationCost except
118249259Sdim  /// that it supports an interface that provides extra information specific to
119249259Sdim  /// the GEP operation.
120249259Sdim  virtual unsigned getGEPCost(const Value *Ptr,
121249259Sdim                              ArrayRef<const Value *> Operands) const;
122249259Sdim
123249259Sdim  /// \brief Estimate the cost of a function call when lowered.
124249259Sdim  ///
125249259Sdim  /// The contract for this is the same as \c getOperationCost except that it
126249259Sdim  /// supports an interface that provides extra information specific to call
127249259Sdim  /// instructions.
128249259Sdim  ///
129249259Sdim  /// This is the most basic query for estimating call cost: it only knows the
130249259Sdim  /// function type and (potentially) the number of arguments at the call site.
131249259Sdim  /// The latter is only interesting for varargs function types.
132249259Sdim  virtual unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
133249259Sdim
134249259Sdim  /// \brief Estimate the cost of calling a specific function when lowered.
135249259Sdim  ///
136249259Sdim  /// This overload adds the ability to reason about the particular function
137249259Sdim  /// being called in the event it is a library call with special lowering.
138249259Sdim  virtual unsigned getCallCost(const Function *F, int NumArgs = -1) const;
139249259Sdim
140249259Sdim  /// \brief Estimate the cost of calling a specific function when lowered.
141249259Sdim  ///
142249259Sdim  /// This overload allows specifying a set of candidate argument values.
143249259Sdim  virtual unsigned getCallCost(const Function *F,
144249259Sdim                               ArrayRef<const Value *> Arguments) const;
145249259Sdim
146249259Sdim  /// \brief Estimate the cost of an intrinsic when lowered.
147249259Sdim  ///
148249259Sdim  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
149249259Sdim  virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
150249259Sdim                                    ArrayRef<Type *> ParamTys) const;
151249259Sdim
152249259Sdim  /// \brief Estimate the cost of an intrinsic when lowered.
153249259Sdim  ///
154249259Sdim  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
155249259Sdim  virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
156249259Sdim                                    ArrayRef<const Value *> Arguments) const;
157249259Sdim
158249259Sdim  /// \brief Estimate the cost of a given IR user when lowered.
159249259Sdim  ///
160249259Sdim  /// This can estimate the cost of either a ConstantExpr or Instruction when
161249259Sdim  /// lowered. It has two primary advantages over the \c getOperationCost and
162249259Sdim  /// \c getGEPCost above, and one significant disadvantage: it can only be
163249259Sdim  /// used when the IR construct has already been formed.
164249259Sdim  ///
165249259Sdim  /// The advantages are that it can inspect the SSA use graph to reason more
166249259Sdim  /// accurately about the cost. For example, all-constant-GEPs can often be
167249259Sdim  /// folded into a load or other instruction, but if they are used in some
168249259Sdim  /// other context they may not be folded. This routine can distinguish such
169249259Sdim  /// cases.
170249259Sdim  ///
171249259Sdim  /// The returned cost is defined in terms of \c TargetCostConstants, see its
172249259Sdim  /// comments for a detailed explanation of the cost values.
173249259Sdim  virtual unsigned getUserCost(const User *U) const;
174249259Sdim
175263508Sdim  /// \brief hasBranchDivergence - Return true if branch divergence exists.
176263508Sdim  /// Branch divergence has a significantly negative impact on GPU performance
177263508Sdim  /// when threads in the same wavefront take different paths due to conditional
178263508Sdim  /// branches.
179263508Sdim  virtual bool hasBranchDivergence() const;
180263508Sdim
181249259Sdim  /// \brief Test whether calls to a function lower to actual program function
182249259Sdim  /// calls.
183249259Sdim  ///
184249259Sdim  /// The idea is to test whether the program is likely to require a 'call'
185249259Sdim  /// instruction or equivalent in order to call the given function.
186249259Sdim  ///
187249259Sdim  /// FIXME: It's not clear that this is a good or useful query API. Client's
188249259Sdim  /// should probably move to simpler cost metrics using the above.
189249259Sdim  /// Alternatively, we could split the cost interface into distinct code-size
190249259Sdim  /// and execution-speed costs. This would allow modelling the core of this
191249259Sdim  /// query more accurately as the a call is a single small instruction, but
192249259Sdim  /// incurs significant execution cost.
193249259Sdim  virtual bool isLoweredToCall(const Function *F) const;
194249259Sdim
195263508Sdim  /// Parameters that control the generic loop unrolling transformation.
196263508Sdim  struct UnrollingPreferences {
197263508Sdim    /// The cost threshold for the unrolled loop, compared to
198263508Sdim    /// CodeMetrics.NumInsts aggregated over all basic blocks in the loop body.
199263508Sdim    /// The unrolling factor is set such that the unrolled loop body does not
200263508Sdim    /// exceed this cost. Set this to UINT_MAX to disable the loop body cost
201263508Sdim    /// restriction.
202263508Sdim    unsigned Threshold;
203263508Sdim    /// The cost threshold for the unrolled loop when optimizing for size (set
204263508Sdim    /// to UINT_MAX to disable).
205263508Sdim    unsigned OptSizeThreshold;
206263508Sdim    /// A forced unrolling factor (the number of concatenated bodies of the
207263508Sdim    /// original loop in the unrolled loop body). When set to 0, the unrolling
208263508Sdim    /// transformation will select an unrolling factor based on the current cost
209263508Sdim    /// threshold and other factors.
210263508Sdim    unsigned Count;
211263508Sdim    /// Allow partial unrolling (unrolling of loops to expand the size of the
212263508Sdim    /// loop body, not only to eliminate small constant-trip-count loops).
213263508Sdim    bool     Partial;
214263508Sdim    /// Allow runtime unrolling (unrolling of loops to expand the size of the
215263508Sdim    /// loop body even when the number of loop iterations is not known at compile
216263508Sdim    /// time).
217263508Sdim    bool     Runtime;
218263508Sdim  };
219263508Sdim
220263508Sdim  /// \brief Get target-customized preferences for the generic loop unrolling
221263508Sdim  /// transformation. The caller will initialize UP with the current
222263508Sdim  /// target-independent defaults.
223263508Sdim  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
224263508Sdim
225249259Sdim  /// @}
226249259Sdim
227249259Sdim  /// \name Scalar Target Information
228249259Sdim  /// @{
229249259Sdim
230249259Sdim  /// \brief Flags indicating the kind of support for population count.
231249259Sdim  ///
232249259Sdim  /// Compared to the SW implementation, HW support is supposed to
233249259Sdim  /// significantly boost the performance when the population is dense, and it
234249259Sdim  /// may or may not degrade performance if the population is sparse. A HW
235249259Sdim  /// support is considered as "Fast" if it can outperform, or is on a par
236249259Sdim  /// with, SW implementation when the population is sparse; otherwise, it is
237249259Sdim  /// considered as "Slow".
238249259Sdim  enum PopcntSupportKind {
239249259Sdim    PSK_Software,
240249259Sdim    PSK_SlowHardware,
241249259Sdim    PSK_FastHardware
242249259Sdim  };
243249259Sdim
244249259Sdim  /// isLegalAddImmediate - Return true if the specified immediate is legal
245249259Sdim  /// add immediate, that is the target has add instructions which can add
246249259Sdim  /// a register with the immediate without having to materialize the
247249259Sdim  /// immediate into a register.
248249259Sdim  virtual bool isLegalAddImmediate(int64_t Imm) const;
249249259Sdim
250249259Sdim  /// isLegalICmpImmediate - Return true if the specified immediate is legal
251249259Sdim  /// icmp immediate, that is the target has icmp instructions which can compare
252249259Sdim  /// a register against the immediate without having to materialize the
253249259Sdim  /// immediate into a register.
254249259Sdim  virtual bool isLegalICmpImmediate(int64_t Imm) const;
255249259Sdim
256249259Sdim  /// isLegalAddressingMode - Return true if the addressing mode represented by
257249259Sdim  /// AM is legal for this target, for a load/store of the specified type.
258249259Sdim  /// The type may be VoidTy, in which case only return true if the addressing
259249259Sdim  /// mode is legal for a load/store of any legal type.
260249259Sdim  /// TODO: Handle pre/postinc as well.
261249259Sdim  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
262249259Sdim                                     int64_t BaseOffset, bool HasBaseReg,
263249259Sdim                                     int64_t Scale) const;
264249259Sdim
265263508Sdim  /// \brief Return the cost of the scaling factor used in the addressing
266263508Sdim  /// mode represented by AM for this target, for a load/store
267263508Sdim  /// of the specified type.
268263508Sdim  /// If the AM is supported, the return value must be >= 0.
269263508Sdim  /// If the AM is not supported, it returns a negative value.
270263508Sdim  /// TODO: Handle pre/postinc as well.
271263508Sdim  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
272263508Sdim                                   int64_t BaseOffset, bool HasBaseReg,
273263508Sdim                                   int64_t Scale) const;
274263508Sdim
275249259Sdim  /// isTruncateFree - Return true if it's free to truncate a value of
276249259Sdim  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
277249259Sdim  /// register EAX to i16 by referencing its sub-register AX.
278249259Sdim  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
279249259Sdim
280249259Sdim  /// Is this type legal.
281249259Sdim  virtual bool isTypeLegal(Type *Ty) const;
282249259Sdim
283249259Sdim  /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes
284249259Sdim  virtual unsigned getJumpBufAlignment() const;
285249259Sdim
286249259Sdim  /// getJumpBufSize - returns the target's jmp_buf size in bytes.
287249259Sdim  virtual unsigned getJumpBufSize() const;
288249259Sdim
289249259Sdim  /// shouldBuildLookupTables - Return true if switches should be turned into
290249259Sdim  /// lookup tables for the target.
291249259Sdim  virtual bool shouldBuildLookupTables() const;
292249259Sdim
293249259Sdim  /// getPopcntSupport - Return hardware support for population count.
294249259Sdim  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
295249259Sdim
296263508Sdim  /// haveFastSqrt -- Return true if the hardware has a fast square-root
297263508Sdim  /// instruction.
298263508Sdim  virtual bool haveFastSqrt(Type *Ty) const;
299263508Sdim
300249259Sdim  /// getIntImmCost - Return the expected cost of materializing the given
301249259Sdim  /// integer immediate of the specified type.
302249259Sdim  virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
303249259Sdim
304249259Sdim  /// @}
305249259Sdim
306249259Sdim  /// \name Vector Target Information
307249259Sdim  /// @{
308249259Sdim
309249259Sdim  /// \brief The various kinds of shuffle patterns for vector queries.
310249259Sdim  enum ShuffleKind {
311249259Sdim    SK_Broadcast,       ///< Broadcast element 0 to all other elements.
312249259Sdim    SK_Reverse,         ///< Reverse the order of the vector.
313249259Sdim    SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
314249259Sdim    SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
315249259Sdim  };
316249259Sdim
317263508Sdim  /// \brief Additional information about an operand's possible values.
318249259Sdim  enum OperandValueKind {
319249259Sdim    OK_AnyValue,            // Operand can have any value.
320249259Sdim    OK_UniformValue,        // Operand is uniform (splat of a value).
321249259Sdim    OK_UniformConstantValue // Operand is uniform constant.
322249259Sdim  };
323249259Sdim
324249259Sdim  /// \return The number of scalar or vector registers that the target has.
325249259Sdim  /// If 'Vectors' is true, it returns the number of vector registers. If it is
326249259Sdim  /// set to false, it returns the number of scalar registers.
327249259Sdim  virtual unsigned getNumberOfRegisters(bool Vector) const;
328249259Sdim
329249259Sdim  /// \return The width of the largest scalar or vector register type.
330249259Sdim  virtual unsigned getRegisterBitWidth(bool Vector) const;
331249259Sdim
332249259Sdim  /// \return The maximum unroll factor that the vectorizer should try to
333249259Sdim  /// perform for this target. This number depends on the level of parallelism
334249259Sdim  /// and the number of execution units in the CPU.
335249259Sdim  virtual unsigned getMaximumUnrollFactor() const;
336249259Sdim
337249259Sdim  /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
338249259Sdim  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
339249259Sdim                                  OperandValueKind Opd1Info = OK_AnyValue,
340249259Sdim                                  OperandValueKind Opd2Info = OK_AnyValue) const;
341249259Sdim
342249259Sdim  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
343249259Sdim  /// The index and subtype parameters are used by the subvector insertion and
344249259Sdim  /// extraction shuffle kinds.
345249259Sdim  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
346249259Sdim                                  Type *SubTp = 0) const;
347249259Sdim
348249259Sdim  /// \return The expected cost of cast instructions, such as bitcast, trunc,
349249259Sdim  /// zext, etc.
350249259Sdim  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
351249259Sdim                                    Type *Src) const;
352249259Sdim
353249259Sdim  /// \return The expected cost of control-flow related instructions such as
354249259Sdim  /// Phi, Ret, Br.
355249259Sdim  virtual unsigned getCFInstrCost(unsigned Opcode) const;
356249259Sdim
357249259Sdim  /// \returns The expected cost of compare and select instructions.
358249259Sdim  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
359249259Sdim                                      Type *CondTy = 0) const;
360249259Sdim
361249259Sdim  /// \return The expected cost of vector Insert and Extract.
362249259Sdim  /// Use -1 to indicate that there is no information on the index value.
363249259Sdim  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
364249259Sdim                                      unsigned Index = -1) const;
365249259Sdim
366249259Sdim  /// \return The cost of Load and Store instructions.
367249259Sdim  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
368249259Sdim                                   unsigned Alignment,
369249259Sdim                                   unsigned AddressSpace) const;
370249259Sdim
371263508Sdim  /// \brief Calculate the cost of performing a vector reduction.
372263508Sdim  ///
373263508Sdim  /// This is the cost of reducing the vector value of type \p Ty to a scalar
374263508Sdim  /// value using the operation denoted by \p Opcode. The form of the reduction
375263508Sdim  /// can either be a pairwise reduction or a reduction that splits the vector
376263508Sdim  /// at every reduction level.
377263508Sdim  ///
378263508Sdim  /// Pairwise:
379263508Sdim  ///  (v0, v1, v2, v3)
380263508Sdim  ///  ((v0+v1), (v2, v3), undef, undef)
381263508Sdim  /// Split:
382263508Sdim  ///  (v0, v1, v2, v3)
383263508Sdim  ///  ((v0+v2), (v1+v3), undef, undef)
384263508Sdim  virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
385263508Sdim                                    bool IsPairwiseForm) const;
386263508Sdim
387249259Sdim  /// \returns The cost of Intrinsic instructions.
388249259Sdim  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
389249259Sdim                                         ArrayRef<Type *> Tys) const;
390249259Sdim
391249259Sdim  /// \returns The number of pieces into which the provided type must be
392249259Sdim  /// split during legalization. Zero is returned when the answer is unknown.
393249259Sdim  virtual unsigned getNumberOfParts(Type *Tp) const;
394249259Sdim
395249259Sdim  /// \returns The cost of the address computation. For most targets this can be
396249259Sdim  /// merged into the instruction indexing mode. Some targets might want to
397249259Sdim  /// distinguish between address computation for memory operations on vector
398249259Sdim  /// types and scalar types. Such targets should override this function.
399263508Sdim  /// The 'IsComplex' parameter is a hint that the address computation is likely
400263508Sdim  /// to involve multiple instructions and as such unlikely to be merged into
401263508Sdim  /// the address indexing mode.
402263508Sdim  virtual unsigned getAddressComputationCost(Type *Ty,
403263508Sdim                                             bool IsComplex = false) const;
404249259Sdim
405249259Sdim  /// @}
406249259Sdim
407249259Sdim  /// Analysis group identification.
408249259Sdim  static char ID;
409249259Sdim};
410249259Sdim
411249259Sdim/// \brief Create the base case instance of a pass in the TTI analysis group.
412249259Sdim///
413249259Sdim/// This class provides the base case for the stack of TTI analyzes. It doesn't
414249259Sdim/// delegate to anything and uses the STTI and VTTI objects passed in to
415249259Sdim/// satisfy the queries.
416249259SdimImmutablePass *createNoTargetTransformInfoPass();
417249259Sdim
418249259Sdim} // End llvm namespace
419249259Sdim
420249259Sdim#endif
421