LoopVectorizationLegality.h revision 360784
1//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file defines the LoopVectorizationLegality class. Original code
11/// in Loop Vectorizer has been moved out to its own file for modularity
12/// and reusability.
13///
14/// Currently, it works for innermost loop vectorization. Extending this to
15/// outer loop vectorization is a TODO item.
16///
17/// Also provides:
18/// 1) LoopVectorizeHints class which keeps a number of loop annotations
19/// locally for easy look up. It has the ability to write them back as
20/// loop metadata, upon request.
21/// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22/// of reporting useful failure to vectorize message.
23//
24//===----------------------------------------------------------------------===//
25
26#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28
29#include "llvm/ADT/MapVector.h"
30#include "llvm/Analysis/LoopAccessAnalysis.h"
31#include "llvm/Analysis/OptimizationRemarkEmitter.h"
32#include "llvm/Transforms/Utils/LoopUtils.h"
33
34namespace llvm {
35
36/// Utility class for getting and setting loop vectorizer hints in the form
37/// of loop metadata.
38/// This class keeps a number of loop annotations locally (as member variables)
39/// and can, upon request, write them back as metadata on the loop. It will
40/// initially scan the loop for existing metadata, and will update the local
41/// values based on information in the loop.
42/// We cannot write all values to metadata, as the mere presence of some info,
43/// for example 'force', means a decision has been made. So, we need to be
44/// careful NOT to add them if the user hasn't specifically asked so.
45class LoopVectorizeHints {
46  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
47                  HK_PREDICATE };
48
49  /// Hint - associates name and validation with the hint value.
50  struct Hint {
51    const char *Name;
52    unsigned Value; // This may have to change for non-numeric values.
53    HintKind Kind;
54
55    Hint(const char *Name, unsigned Value, HintKind Kind)
56        : Name(Name), Value(Value), Kind(Kind) {}
57
58    bool validate(unsigned Val);
59  };
60
61  /// Vectorization width.
62  Hint Width;
63
64  /// Vectorization interleave factor.
65  Hint Interleave;
66
67  /// Vectorization forced
68  Hint Force;
69
70  /// Already Vectorized
71  Hint IsVectorized;
72
73  /// Vector Predicate
74  Hint Predicate;
75
76  /// Return the loop metadata prefix.
77  static StringRef Prefix() { return "llvm.loop."; }
78
79  /// True if there is any unsafe math in the loop.
80  bool PotentiallyUnsafe = false;
81
82public:
83  enum ForceKind {
84    FK_Undefined = -1, ///< Not selected.
85    FK_Disabled = 0,   ///< Forcing disabled.
86    FK_Enabled = 1,    ///< Forcing enabled.
87  };
88
89  LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
90                     OptimizationRemarkEmitter &ORE);
91
92  /// Mark the loop L as already vectorized by setting the width to 1.
93  void setAlreadyVectorized();
94
95  bool allowVectorization(Function *F, Loop *L,
96                          bool VectorizeOnlyWhenForced) const;
97
98  /// Dumps all the hint information.
99  void emitRemarkWithHints() const;
100
101  unsigned getWidth() const { return Width.Value; }
102  unsigned getInterleave() const { return Interleave.Value; }
103  unsigned getIsVectorized() const { return IsVectorized.Value; }
104  unsigned getPredicate() const { return Predicate.Value; }
105  enum ForceKind getForce() const {
106    if ((ForceKind)Force.Value == FK_Undefined &&
107        hasDisableAllTransformsHint(TheLoop))
108      return FK_Disabled;
109    return (ForceKind)Force.Value;
110  }
111
112  /// If hints are provided that force vectorization, use the AlwaysPrint
113  /// pass name to force the frontend to print the diagnostic.
114  const char *vectorizeAnalysisPassName() const;
115
116  bool allowReordering() const {
117    // When enabling loop hints are provided we allow the vectorizer to change
118    // the order of operations that is given by the scalar loop. This is not
119    // enabled by default because can be unsafe or inefficient. For example,
120    // reordering floating-point operations will change the way round-off
121    // error accumulates in the loop.
122    return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
123  }
124
125  bool isPotentiallyUnsafe() const {
126    // Avoid FP vectorization if the target is unsure about proper support.
127    // This may be related to the SIMD unit in the target not handling
128    // IEEE 754 FP ops properly, or bad single-to-double promotions.
129    // Otherwise, a sequence of vectorized loops, even without reduction,
130    // could lead to different end results on the destination vectors.
131    return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
132  }
133
134  void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
135
136private:
137  /// Find hints specified in the loop metadata and update local values.
138  void getHintsFromMetadata();
139
140  /// Checks string hint with one operand and set value if valid.
141  void setHint(StringRef Name, Metadata *Arg);
142
143  /// The loop these hints belong to.
144  const Loop *TheLoop;
145
146  /// Interface to emit optimization remarks.
147  OptimizationRemarkEmitter &ORE;
148};
149
150/// This holds vectorization requirements that must be verified late in
151/// the process. The requirements are set by legalize and costmodel. Once
152/// vectorization has been determined to be possible and profitable the
153/// requirements can be verified by looking for metadata or compiler options.
154/// For example, some loops require FP commutativity which is only allowed if
155/// vectorization is explicitly specified or if the fast-math compiler option
156/// has been provided.
157/// Late evaluation of these requirements allows helpful diagnostics to be
158/// composed that tells the user what need to be done to vectorize the loop. For
159/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
160/// evaluation should be used only when diagnostics can generated that can be
161/// followed by a non-expert user.
162class LoopVectorizationRequirements {
163public:
164  LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
165
166  void addUnsafeAlgebraInst(Instruction *I) {
167    // First unsafe algebra instruction.
168    if (!UnsafeAlgebraInst)
169      UnsafeAlgebraInst = I;
170  }
171
172  void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
173
174  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
175
176private:
177  unsigned NumRuntimePointerChecks = 0;
178  Instruction *UnsafeAlgebraInst = nullptr;
179
180  /// Interface to emit optimization remarks.
181  OptimizationRemarkEmitter &ORE;
182};
183
184/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
185/// to what vectorization factor.
186/// This class does not look at the profitability of vectorization, only the
187/// legality. This class has two main kinds of checks:
188/// * Memory checks - The code in canVectorizeMemory checks if vectorization
189///   will change the order of memory accesses in a way that will change the
190///   correctness of the program.
191/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
192/// checks for a number of different conditions, such as the availability of a
193/// single induction variable, that all types are supported and vectorize-able,
194/// etc. This code reflects the capabilities of InnerLoopVectorizer.
195/// This class is also used by InnerLoopVectorizer for identifying
196/// induction variable and the different reduction variables.
197class LoopVectorizationLegality {
198public:
199  LoopVectorizationLegality(
200      Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
201      TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
202      Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
203      LoopInfo *LI, OptimizationRemarkEmitter *ORE,
204      LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
205      AssumptionCache *AC)
206      : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
207        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
208
209  /// ReductionList contains the reduction descriptors for all
210  /// of the reductions that were found in the loop.
211  using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
212
213  /// InductionList saves induction variables and maps them to the
214  /// induction descriptor.
215  using InductionList = MapVector<PHINode *, InductionDescriptor>;
216
217  /// RecurrenceSet contains the phi nodes that are recurrences other than
218  /// inductions and reductions.
219  using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
220
221  /// Returns true if it is legal to vectorize this loop.
222  /// This does not mean that it is profitable to vectorize this
223  /// loop, only that it is legal to do so.
224  /// Temporarily taking UseVPlanNativePath parameter. If true, take
225  /// the new code path being implemented for outer loop vectorization
226  /// (should be functional for inner loop vectorization) based on VPlan.
227  /// If false, good old LV code.
228  bool canVectorize(bool UseVPlanNativePath);
229
230  /// Return true if we can vectorize this loop while folding its tail by
231  /// masking, and mark all respective loads/stores for masking.
232  bool prepareToFoldTailByMasking();
233
234  /// Returns the primary induction variable.
235  PHINode *getPrimaryInduction() { return PrimaryInduction; }
236
237  /// Returns the reduction variables found in the loop.
238  ReductionList *getReductionVars() { return &Reductions; }
239
240  /// Returns the induction variables found in the loop.
241  InductionList *getInductionVars() { return &Inductions; }
242
243  /// Return the first-order recurrences found in the loop.
244  RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
245
246  /// Return the set of instructions to sink to handle first-order recurrences.
247  DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
248
249  /// Returns the widest induction type.
250  Type *getWidestInductionType() { return WidestIndTy; }
251
252  /// Returns True if V is a Phi node of an induction variable in this loop.
253  bool isInductionPhi(const Value *V);
254
255  /// Returns True if V is a cast that is part of an induction def-use chain,
256  /// and had been proven to be redundant under a runtime guard (in other
257  /// words, the cast has the same SCEV expression as the induction phi).
258  bool isCastedInductionVariable(const Value *V);
259
260  /// Returns True if V can be considered as an induction variable in this
261  /// loop. V can be the induction phi, or some redundant cast in the def-use
262  /// chain of the inducion phi.
263  bool isInductionVariable(const Value *V);
264
265  /// Returns True if PN is a reduction variable in this loop.
266  bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
267
268  /// Returns True if Phi is a first-order recurrence in this loop.
269  bool isFirstOrderRecurrence(const PHINode *Phi);
270
271  /// Return true if the block BB needs to be predicated in order for the loop
272  /// to be vectorized.
273  bool blockNeedsPredication(BasicBlock *BB);
274
275  /// Check if this pointer is consecutive when vectorizing. This happens
276  /// when the last index of the GEP is the induction variable, or that the
277  /// pointer itself is an induction variable.
278  /// This check allows us to vectorize A[idx] into a wide load/store.
279  /// Returns:
280  /// 0 - Stride is unknown or non-consecutive.
281  /// 1 - Address is consecutive.
282  /// -1 - Address is consecutive, and decreasing.
283  /// NOTE: This method must only be used before modifying the original scalar
284  /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
285  int isConsecutivePtr(Value *Ptr);
286
287  /// Returns true if the value V is uniform within the loop.
288  bool isUniform(Value *V);
289
290  /// Returns the information that we collected about runtime memory check.
291  const RuntimePointerChecking *getRuntimePointerChecking() const {
292    return LAI->getRuntimePointerChecking();
293  }
294
295  const LoopAccessInfo *getLAI() const { return LAI; }
296
297  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
298
299  uint64_t getMaxSafeRegisterWidth() const {
300    return LAI->getDepChecker().getMaxSafeRegisterWidth();
301  }
302
303  bool hasStride(Value *V) { return LAI->hasStride(V); }
304
305  /// Returns true if vector representation of the instruction \p I
306  /// requires mask.
307  bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
308
309  unsigned getNumStores() const { return LAI->getNumStores(); }
310  unsigned getNumLoads() const { return LAI->getNumLoads(); }
311
312  // Returns true if the NoNaN attribute is set on the function.
313  bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
314
315private:
316  /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
317  /// its nested loops are considered legal for vectorization. These legal
318  /// checks are common for inner and outer loop vectorization.
319  /// Temporarily taking UseVPlanNativePath parameter. If true, take
320  /// the new code path being implemented for outer loop vectorization
321  /// (should be functional for inner loop vectorization) based on VPlan.
322  /// If false, good old LV code.
323  bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
324
325  /// Set up outer loop inductions by checking Phis in outer loop header for
326  /// supported inductions (int inductions). Return false if any of these Phis
327  /// is not a supported induction or if we fail to find an induction.
328  bool setupOuterLoopInductions();
329
330  /// Return true if the pre-header, exiting and latch blocks of \p Lp
331  /// (non-recursive) are considered legal for vectorization.
332  /// Temporarily taking UseVPlanNativePath parameter. If true, take
333  /// the new code path being implemented for outer loop vectorization
334  /// (should be functional for inner loop vectorization) based on VPlan.
335  /// If false, good old LV code.
336  bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
337
338  /// Check if a single basic block loop is vectorizable.
339  /// At this point we know that this is a loop with a constant trip count
340  /// and we only need to check individual instructions.
341  bool canVectorizeInstrs();
342
343  /// When we vectorize loops we may change the order in which
344  /// we read and write from memory. This method checks if it is
345  /// legal to vectorize the code, considering only memory constrains.
346  /// Returns true if the loop is vectorizable
347  bool canVectorizeMemory();
348
349  /// Return true if we can vectorize this loop using the IF-conversion
350  /// transformation.
351  bool canVectorizeWithIfConvert();
352
353  /// Return true if we can vectorize this outer loop. The method performs
354  /// specific checks for outer loop vectorization.
355  bool canVectorizeOuterLoop();
356
357  /// Return true if all of the instructions in the block can be speculatively
358  /// executed, and record the loads/stores that require masking. If's that
359  /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
360  /// is true. This can happen when we introduces guards for which the original
361  /// "unguarded-loads are safe" assumption does not hold. For example, the
362  /// vectorizer's fold-tail transformation changes the loop to execute beyond
363  /// its original trip-count, under a proper guard, which should be preserved.
364  /// \p SafePtrs is a list of addresses that are known to be legal and we know
365  /// that we can read from them without segfault.
366  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
367                            bool PreserveGuards = false);
368
369  /// Updates the vectorization state by adding \p Phi to the inductions list.
370  /// This can set \p Phi as the main induction of the loop if \p Phi is a
371  /// better choice for the main induction than the existing one.
372  void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
373                       SmallPtrSetImpl<Value *> &AllowedExit);
374
375  /// If an access has a symbolic strides, this maps the pointer value to
376  /// the stride symbol.
377  const ValueToValueMap *getSymbolicStrides() {
378    // FIXME: Currently, the set of symbolic strides is sometimes queried before
379    // it's collected.  This happens from canVectorizeWithIfConvert, when the
380    // pointer is checked to reference consecutive elements suitable for a
381    // masked access.
382    return LAI ? &LAI->getSymbolicStrides() : nullptr;
383  }
384
385  /// The loop that we evaluate.
386  Loop *TheLoop;
387
388  /// Loop Info analysis.
389  LoopInfo *LI;
390
391  /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
392  /// Applies dynamic knowledge to simplify SCEV expressions in the context
393  /// of existing SCEV assumptions. The analysis will also add a minimal set
394  /// of new predicates if this is required to enable vectorization and
395  /// unrolling.
396  PredicatedScalarEvolution &PSE;
397
398  /// Target Transform Info.
399  TargetTransformInfo *TTI;
400
401  /// Target Library Info.
402  TargetLibraryInfo *TLI;
403
404  /// Dominator Tree.
405  DominatorTree *DT;
406
407  // LoopAccess analysis.
408  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
409
410  // And the loop-accesses info corresponding to this loop.  This pointer is
411  // null until canVectorizeMemory sets it up.
412  const LoopAccessInfo *LAI = nullptr;
413
414  /// Interface to emit optimization remarks.
415  OptimizationRemarkEmitter *ORE;
416
417  //  ---  vectorization state --- //
418
419  /// Holds the primary induction variable. This is the counter of the
420  /// loop.
421  PHINode *PrimaryInduction = nullptr;
422
423  /// Holds the reduction variables.
424  ReductionList Reductions;
425
426  /// Holds all of the induction variables that we found in the loop.
427  /// Notice that inductions don't need to start at zero and that induction
428  /// variables can be pointers.
429  InductionList Inductions;
430
431  /// Holds all the casts that participate in the update chain of the induction
432  /// variables, and that have been proven to be redundant (possibly under a
433  /// runtime guard). These casts can be ignored when creating the vectorized
434  /// loop body.
435  SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
436
437  /// Holds the phi nodes that are first-order recurrences.
438  RecurrenceSet FirstOrderRecurrences;
439
440  /// Holds instructions that need to sink past other instructions to handle
441  /// first-order recurrences.
442  DenseMap<Instruction *, Instruction *> SinkAfter;
443
444  /// Holds the widest induction type encountered.
445  Type *WidestIndTy = nullptr;
446
447  /// Allowed outside users. This holds the variables that can be accessed from
448  /// outside the loop.
449  SmallPtrSet<Value *, 4> AllowedExit;
450
451  /// Can we assume the absence of NaNs.
452  bool HasFunNoNaNAttr = false;
453
454  /// Vectorization requirements that will go through late-evaluation.
455  LoopVectorizationRequirements *Requirements;
456
457  /// Used to emit an analysis of any legality issues.
458  LoopVectorizeHints *Hints;
459
460  /// The demanded bits analysis is used to compute the minimum type size in
461  /// which a reduction can be computed.
462  DemandedBits *DB;
463
464  /// The assumption cache analysis is used to compute the minimum type size in
465  /// which a reduction can be computed.
466  AssumptionCache *AC;
467
468  /// While vectorizing these instructions we have to generate a
469  /// call to the appropriate masked intrinsic
470  SmallPtrSet<const Instruction *, 8> MaskedOp;
471};
472
473} // namespace llvm
474
475#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
476