Deleted Added
full compact
TargetTransformInfo.h (256281) TargetTransformInfo.h (263508)
1//===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//

--- 15 unchanged lines hidden (view full) ---

24
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/DataTypes.h"
28
29namespace llvm {
30
31class GlobalValue;
1//===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//

--- 15 unchanged lines hidden (view full) ---

24
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/Pass.h"
27#include "llvm/Support/DataTypes.h"
28
29namespace llvm {
30
31class GlobalValue;
32class Loop;
32class Type;
33class User;
34class Value;
35
36/// TargetTransformInfo - This pass provides access to the codegen
37/// interfaces that are needed for IR-level transformations.
38class TargetTransformInfo {
39protected:

--- 126 unchanged lines hidden (view full) ---

166 /// folded into a load or other instruction, but if they are used in some
167 /// other context they may not be folded. This routine can distinguish such
168 /// cases.
169 ///
170 /// The returned cost is defined in terms of \c TargetCostConstants, see its
171 /// comments for a detailed explanation of the cost values.
172 virtual unsigned getUserCost(const User *U) const;
173
33class Type;
34class User;
35class Value;
36
37/// TargetTransformInfo - This pass provides access to the codegen
38/// interfaces that are needed for IR-level transformations.
39class TargetTransformInfo {
40protected:

--- 126 unchanged lines hidden (view full) ---

167 /// folded into a load or other instruction, but if they are used in some
168 /// other context they may not be folded. This routine can distinguish such
169 /// cases.
170 ///
171 /// The returned cost is defined in terms of \c TargetCostConstants, see its
172 /// comments for a detailed explanation of the cost values.
173 virtual unsigned getUserCost(const User *U) const;
174
175 /// \brief hasBranchDivergence - Return true if branch divergence exists.
176 /// Branch divergence has a significantly negative impact on GPU performance
177 /// when threads in the same wavefront take different paths due to conditional
178 /// branches.
179 virtual bool hasBranchDivergence() const;
180
174 /// \brief Test whether calls to a function lower to actual program function
175 /// calls.
176 ///
177 /// The idea is to test whether the program is likely to require a 'call'
178 /// instruction or equivalent in order to call the given function.
179 ///
180 /// FIXME: It's not clear that this is a good or useful query API. Client's
181 /// should probably move to simpler cost metrics using the above.
182 /// Alternatively, we could split the cost interface into distinct code-size
183 /// and execution-speed costs. This would allow modelling the core of this
184 /// query more accurately as the a call is a single small instruction, but
185 /// incurs significant execution cost.
186 virtual bool isLoweredToCall(const Function *F) const;
187
181 /// \brief Test whether calls to a function lower to actual program function
182 /// calls.
183 ///
184 /// The idea is to test whether the program is likely to require a 'call'
185 /// instruction or equivalent in order to call the given function.
186 ///
187 /// FIXME: It's not clear that this is a good or useful query API. Client's
188 /// should probably move to simpler cost metrics using the above.
189 /// Alternatively, we could split the cost interface into distinct code-size
190 /// and execution-speed costs. This would allow modelling the core of this
191 /// query more accurately as the a call is a single small instruction, but
192 /// incurs significant execution cost.
193 virtual bool isLoweredToCall(const Function *F) const;
194
195 /// Parameters that control the generic loop unrolling transformation.
196 struct UnrollingPreferences {
197 /// The cost threshold for the unrolled loop, compared to
198 /// CodeMetrics.NumInsts aggregated over all basic blocks in the loop body.
199 /// The unrolling factor is set such that the unrolled loop body does not
200 /// exceed this cost. Set this to UINT_MAX to disable the loop body cost
201 /// restriction.
202 unsigned Threshold;
203 /// The cost threshold for the unrolled loop when optimizing for size (set
204 /// to UINT_MAX to disable).
205 unsigned OptSizeThreshold;
206 /// A forced unrolling factor (the number of concatenated bodies of the
207 /// original loop in the unrolled loop body). When set to 0, the unrolling
208 /// transformation will select an unrolling factor based on the current cost
209 /// threshold and other factors.
210 unsigned Count;
211 /// Allow partial unrolling (unrolling of loops to expand the size of the
212 /// loop body, not only to eliminate small constant-trip-count loops).
213 bool Partial;
214 /// Allow runtime unrolling (unrolling of loops to expand the size of the
215 /// loop body even when the number of loop iterations is not known at compile
216 /// time).
217 bool Runtime;
218 };
219
220 /// \brief Get target-customized preferences for the generic loop unrolling
221 /// transformation. The caller will initialize UP with the current
222 /// target-independent defaults.
223 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
224
188 /// @}
189
190 /// \name Scalar Target Information
191 /// @{
192
193 /// \brief Flags indicating the kind of support for population count.
194 ///
195 /// Compared to the SW implementation, HW support is supposed to

--- 24 unchanged lines hidden (view full) ---

220 /// AM is legal for this target, for a load/store of the specified type.
221 /// The type may be VoidTy, in which case only return true if the addressing
222 /// mode is legal for a load/store of any legal type.
223 /// TODO: Handle pre/postinc as well.
224 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
225 int64_t BaseOffset, bool HasBaseReg,
226 int64_t Scale) const;
227
225 /// @}
226
227 /// \name Scalar Target Information
228 /// @{
229
230 /// \brief Flags indicating the kind of support for population count.
231 ///
232 /// Compared to the SW implementation, HW support is supposed to

--- 24 unchanged lines hidden (view full) ---

257 /// AM is legal for this target, for a load/store of the specified type.
258 /// The type may be VoidTy, in which case only return true if the addressing
259 /// mode is legal for a load/store of any legal type.
260 /// TODO: Handle pre/postinc as well.
261 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
262 int64_t BaseOffset, bool HasBaseReg,
263 int64_t Scale) const;
264
265 /// \brief Return the cost of the scaling factor used in the addressing
266 /// mode represented by AM for this target, for a load/store
267 /// of the specified type.
268 /// If the AM is supported, the return value must be >= 0.
269 /// If the AM is not supported, it returns a negative value.
270 /// TODO: Handle pre/postinc as well.
271 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
272 int64_t BaseOffset, bool HasBaseReg,
273 int64_t Scale) const;
274
228 /// isTruncateFree - Return true if it's free to truncate a value of
229 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
230 /// register EAX to i16 by referencing its sub-register AX.
231 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
232
233 /// Is this type legal.
234 virtual bool isTypeLegal(Type *Ty) const;
235

--- 5 unchanged lines hidden (view full) ---

241
242 /// shouldBuildLookupTables - Return true if switches should be turned into
243 /// lookup tables for the target.
244 virtual bool shouldBuildLookupTables() const;
245
246 /// getPopcntSupport - Return hardware support for population count.
247 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
248
275 /// isTruncateFree - Return true if it's free to truncate a value of
276 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
277 /// register EAX to i16 by referencing its sub-register AX.
278 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
279
280 /// Is this type legal.
281 virtual bool isTypeLegal(Type *Ty) const;
282

--- 5 unchanged lines hidden (view full) ---

288
289 /// shouldBuildLookupTables - Return true if switches should be turned into
290 /// lookup tables for the target.
291 virtual bool shouldBuildLookupTables() const;
292
293 /// getPopcntSupport - Return hardware support for population count.
294 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
295
296 /// haveFastSqrt -- Return true if the hardware has a fast square-root
297 /// instruction.
298 virtual bool haveFastSqrt(Type *Ty) const;
299
249 /// getIntImmCost - Return the expected cost of materializing the given
250 /// integer immediate of the specified type.
251 virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
252
253 /// @}
254
255 /// \name Vector Target Information
256 /// @{
257
258 /// \brief The various kinds of shuffle patterns for vector queries.
259 enum ShuffleKind {
260 SK_Broadcast, ///< Broadcast element 0 to all other elements.
261 SK_Reverse, ///< Reverse the order of the vector.
262 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
263 SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
264 };
265
300 /// getIntImmCost - Return the expected cost of materializing the given
301 /// integer immediate of the specified type.
302 virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
303
304 /// @}
305
306 /// \name Vector Target Information
307 /// @{
308
309 /// \brief The various kinds of shuffle patterns for vector queries.
310 enum ShuffleKind {
311 SK_Broadcast, ///< Broadcast element 0 to all other elements.
312 SK_Reverse, ///< Reverse the order of the vector.
313 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
314 SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
315 };
316
266 /// \brief Additonal information about an operand's possible values.
317 /// \brief Additional information about an operand's possible values.
267 enum OperandValueKind {
268 OK_AnyValue, // Operand can have any value.
269 OK_UniformValue, // Operand is uniform (splat of a value).
270 OK_UniformConstantValue // Operand is uniform constant.
271 };
272
273 /// \return The number of scalar or vector registers that the target has.
274 /// If 'Vectors' is true, it returns the number of vector registers. If it is

--- 37 unchanged lines hidden (view full) ---

312 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
313 unsigned Index = -1) const;
314
315 /// \return The cost of Load and Store instructions.
316 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
317 unsigned Alignment,
318 unsigned AddressSpace) const;
319
318 enum OperandValueKind {
319 OK_AnyValue, // Operand can have any value.
320 OK_UniformValue, // Operand is uniform (splat of a value).
321 OK_UniformConstantValue // Operand is uniform constant.
322 };
323
324 /// \return The number of scalar or vector registers that the target has.
325 /// If 'Vectors' is true, it returns the number of vector registers. If it is

--- 37 unchanged lines hidden (view full) ---

363 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
364 unsigned Index = -1) const;
365
366 /// \return The cost of Load and Store instructions.
367 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
368 unsigned Alignment,
369 unsigned AddressSpace) const;
370
371 /// \brief Calculate the cost of performing a vector reduction.
372 ///
373 /// This is the cost of reducing the vector value of type \p Ty to a scalar
374 /// value using the operation denoted by \p Opcode. The form of the reduction
375 /// can either be a pairwise reduction or a reduction that splits the vector
376 /// at every reduction level.
377 ///
378 /// Pairwise:
379 /// (v0, v1, v2, v3)
380 /// ((v0+v1), (v2, v3), undef, undef)
381 /// Split:
382 /// (v0, v1, v2, v3)
383 /// ((v0+v2), (v1+v3), undef, undef)
384 virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
385 bool IsPairwiseForm) const;
386
320 /// \returns The cost of Intrinsic instructions.
321 virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
322 ArrayRef<Type *> Tys) const;
323
324 /// \returns The number of pieces into which the provided type must be
325 /// split during legalization. Zero is returned when the answer is unknown.
326 virtual unsigned getNumberOfParts(Type *Tp) const;
327
328 /// \returns The cost of the address computation. For most targets this can be
329 /// merged into the instruction indexing mode. Some targets might want to
330 /// distinguish between address computation for memory operations on vector
331 /// types and scalar types. Such targets should override this function.
387 /// \returns The cost of Intrinsic instructions.
388 virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
389 ArrayRef<Type *> Tys) const;
390
391 /// \returns The number of pieces into which the provided type must be
392 /// split during legalization. Zero is returned when the answer is unknown.
393 virtual unsigned getNumberOfParts(Type *Tp) const;
394
395 /// \returns The cost of the address computation. For most targets this can be
396 /// merged into the instruction indexing mode. Some targets might want to
397 /// distinguish between address computation for memory operations on vector
398 /// types and scalar types. Such targets should override this function.
332 virtual unsigned getAddressComputationCost(Type *Ty) const;
399 /// The 'IsComplex' parameter is a hint that the address computation is likely
400 /// to involve multiple instructions and as such unlikely to be merged into
401 /// the address indexing mode.
402 virtual unsigned getAddressComputationCost(Type *Ty,
403 bool IsComplex = false) const;
333
334 /// @}
335
336 /// Analysis group identification.
337 static char ID;
338};
339
340/// \brief Create the base case instance of a pass in the TTI analysis group.
341///
342/// This class provides the base case for the stack of TTI analyzes. It doesn't
343/// delegate to anything and uses the STTI and VTTI objects passed in to
344/// satisfy the queries.
345ImmutablePass *createNoTargetTransformInfoPass();
346
347} // End llvm namespace
348
349#endif
404
405 /// @}
406
407 /// Analysis group identification.
408 static char ID;
409};
410
411/// \brief Create the base case instance of a pass in the TTI analysis group.
412///
413/// This class provides the base case for the stack of TTI analyzes. It doesn't
414/// delegate to anything and uses the STTI and VTTI objects passed in to
415/// satisfy the queries.
416ImmutablePass *createNoTargetTransformInfoPass();
417
418} // End llvm namespace
419
420#endif