LoopVectorizationLegality.cpp revision 360784
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16#include "llvm/Transforms/Vectorize/LoopVectorize.h"
17#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/IR/IntrinsicInst.h"
22
23using namespace llvm;
24
25#define LV_NAME "loop-vectorize"
26#define DEBUG_TYPE LV_NAME
27
28extern cl::opt<bool> EnableVPlanPredication;
29
30static cl::opt<bool>
31    EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
32                       cl::desc("Enable if-conversion during vectorization."));
33
34static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
35    "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
36    cl::desc("The maximum allowed number of runtime memory checks with a "
37             "vectorize(enable) pragma."));
38
39static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
40    "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
41    cl::desc("The maximum number of SCEV checks allowed."));
42
43static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
44    "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
45    cl::desc("The maximum number of SCEV checks allowed with a "
46             "vectorize(enable) pragma"));
47
48/// Maximum vectorization interleave count.
49static const unsigned MaxInterleaveFactor = 16;
50
51namespace llvm {
52
53bool LoopVectorizeHints::Hint::validate(unsigned Val) {
54  switch (Kind) {
55  case HK_WIDTH:
56    return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
57  case HK_UNROLL:
58    return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
59  case HK_FORCE:
60    return (Val <= 1);
61  case HK_ISVECTORIZED:
62  case HK_PREDICATE:
63    return (Val == 0 || Val == 1);
64  }
65  return false;
66}
67
68LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
69                                       bool InterleaveOnlyWhenForced,
70                                       OptimizationRemarkEmitter &ORE)
71    : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
72      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
73      Force("vectorize.enable", FK_Undefined, HK_FORCE),
74      IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
75      Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
76      ORE(ORE) {
77  // Populate values with existing loop metadata.
78  getHintsFromMetadata();
79
80  // force-vector-interleave overrides DisableInterleaving.
81  if (VectorizerParams::isInterleaveForced())
82    Interleave.Value = VectorizerParams::VectorizationInterleave;
83
84  if (IsVectorized.Value != 1)
85    // If the vectorization width and interleaving count are both 1 then
86    // consider the loop to have been already vectorized because there's
87    // nothing more that we can do.
88    IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
89  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
90             << "LV: Interleaving disabled by the pass manager\n");
91}
92
93void LoopVectorizeHints::setAlreadyVectorized() {
94  LLVMContext &Context = TheLoop->getHeader()->getContext();
95
96  MDNode *IsVectorizedMD = MDNode::get(
97      Context,
98      {MDString::get(Context, "llvm.loop.isvectorized"),
99       ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
100  MDNode *LoopID = TheLoop->getLoopID();
101  MDNode *NewLoopID =
102      makePostTransformationMetadata(Context, LoopID,
103                                     {Twine(Prefix(), "vectorize.").str(),
104                                      Twine(Prefix(), "interleave.").str()},
105                                     {IsVectorizedMD});
106  TheLoop->setLoopID(NewLoopID);
107
108  // Update internal cache.
109  IsVectorized.Value = 1;
110}
111
112bool LoopVectorizeHints::allowVectorization(
113    Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
114  if (getForce() == LoopVectorizeHints::FK_Disabled) {
115    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
116    emitRemarkWithHints();
117    return false;
118  }
119
120  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
121    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
122    emitRemarkWithHints();
123    return false;
124  }
125
126  if (getIsVectorized() == 1) {
127    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
128    // FIXME: Add interleave.disable metadata. This will allow
129    // vectorize.disable to be used without disabling the pass and errors
130    // to differentiate between disabled vectorization and a width of 1.
131    ORE.emit([&]() {
132      return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
133                                        "AllDisabled", L->getStartLoc(),
134                                        L->getHeader())
135             << "loop not vectorized: vectorization and interleaving are "
136                "explicitly disabled, or the loop has already been "
137                "vectorized";
138    });
139    return false;
140  }
141
142  return true;
143}
144
145void LoopVectorizeHints::emitRemarkWithHints() const {
146  using namespace ore;
147
148  ORE.emit([&]() {
149    if (Force.Value == LoopVectorizeHints::FK_Disabled)
150      return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
151                                      TheLoop->getStartLoc(),
152                                      TheLoop->getHeader())
153             << "loop not vectorized: vectorization is explicitly disabled";
154    else {
155      OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
156                                 TheLoop->getStartLoc(), TheLoop->getHeader());
157      R << "loop not vectorized";
158      if (Force.Value == LoopVectorizeHints::FK_Enabled) {
159        R << " (Force=" << NV("Force", true);
160        if (Width.Value != 0)
161          R << ", Vector Width=" << NV("VectorWidth", Width.Value);
162        if (Interleave.Value != 0)
163          R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
164        R << ")";
165      }
166      return R;
167    }
168  });
169}
170
171const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
172  if (getWidth() == 1)
173    return LV_NAME;
174  if (getForce() == LoopVectorizeHints::FK_Disabled)
175    return LV_NAME;
176  if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
177    return LV_NAME;
178  return OptimizationRemarkAnalysis::AlwaysPrint;
179}
180
181void LoopVectorizeHints::getHintsFromMetadata() {
182  MDNode *LoopID = TheLoop->getLoopID();
183  if (!LoopID)
184    return;
185
186  // First operand should refer to the loop id itself.
187  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
188  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
189
190  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
191    const MDString *S = nullptr;
192    SmallVector<Metadata *, 4> Args;
193
194    // The expected hint is either a MDString or a MDNode with the first
195    // operand a MDString.
196    if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
197      if (!MD || MD->getNumOperands() == 0)
198        continue;
199      S = dyn_cast<MDString>(MD->getOperand(0));
200      for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
201        Args.push_back(MD->getOperand(i));
202    } else {
203      S = dyn_cast<MDString>(LoopID->getOperand(i));
204      assert(Args.size() == 0 && "too many arguments for MDString");
205    }
206
207    if (!S)
208      continue;
209
210    // Check if the hint starts with the loop metadata prefix.
211    StringRef Name = S->getString();
212    if (Args.size() == 1)
213      setHint(Name, Args[0]);
214  }
215}
216
217void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
218  if (!Name.startswith(Prefix()))
219    return;
220  Name = Name.substr(Prefix().size(), StringRef::npos);
221
222  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
223  if (!C)
224    return;
225  unsigned Val = C->getZExtValue();
226
227  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
228  for (auto H : Hints) {
229    if (Name == H->Name) {
230      if (H->validate(Val))
231        H->Value = Val;
232      else
233        LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
234      break;
235    }
236  }
237}
238
239bool LoopVectorizationRequirements::doesNotMeet(
240    Function *F, Loop *L, const LoopVectorizeHints &Hints) {
241  const char *PassName = Hints.vectorizeAnalysisPassName();
242  bool Failed = false;
243  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
244    ORE.emit([&]() {
245      return OptimizationRemarkAnalysisFPCommute(
246                 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
247                 UnsafeAlgebraInst->getParent())
248             << "loop not vectorized: cannot prove it is safe to reorder "
249                "floating-point operations";
250    });
251    Failed = true;
252  }
253
254  // Test if runtime memcheck thresholds are exceeded.
255  bool PragmaThresholdReached =
256      NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
257  bool ThresholdReached =
258      NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
259  if ((ThresholdReached && !Hints.allowReordering()) ||
260      PragmaThresholdReached) {
261    ORE.emit([&]() {
262      return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
263                                                L->getStartLoc(),
264                                                L->getHeader())
265             << "loop not vectorized: cannot prove it is safe to reorder "
266                "memory operations";
267    });
268    LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
269    Failed = true;
270  }
271
272  return Failed;
273}
274
275// Return true if the inner loop \p Lp is uniform with regard to the outer loop
276// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
277// executing the inner loop will execute the same iterations). This check is
278// very constrained for now but it will be relaxed in the future. \p Lp is
279// considered uniform if it meets all the following conditions:
280//   1) it has a canonical IV (starting from 0 and with stride 1),
281//   2) its latch terminator is a conditional branch and,
282//   3) its latch condition is a compare instruction whose operands are the
283//      canonical IV and an OuterLp invariant.
284// This check doesn't take into account the uniformity of other conditions not
285// related to the loop latch because they don't affect the loop uniformity.
286//
287// NOTE: We decided to keep all these checks and its associated documentation
288// together so that we can easily have a picture of the current supported loop
289// nests. However, some of the current checks don't depend on \p OuterLp and
290// would be redundantly executed for each \p Lp if we invoked this function for
291// different candidate outer loops. This is not the case for now because we
292// don't currently have the infrastructure to evaluate multiple candidate outer
293// loops and \p OuterLp will be a fixed parameter while we only support explicit
294// outer loop vectorization. It's also very likely that these checks go away
295// before introducing the aforementioned infrastructure. However, if this is not
296// the case, we should move the \p OuterLp independent checks to a separate
297// function that is only executed once for each \p Lp.
298static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
299  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
300
301  // If Lp is the outer loop, it's uniform by definition.
302  if (Lp == OuterLp)
303    return true;
304  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
305
306  // 1.
307  PHINode *IV = Lp->getCanonicalInductionVariable();
308  if (!IV) {
309    LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
310    return false;
311  }
312
313  // 2.
314  BasicBlock *Latch = Lp->getLoopLatch();
315  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
316  if (!LatchBr || LatchBr->isUnconditional()) {
317    LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
318    return false;
319  }
320
321  // 3.
322  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
323  if (!LatchCmp) {
324    LLVM_DEBUG(
325        dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
326    return false;
327  }
328
329  Value *CondOp0 = LatchCmp->getOperand(0);
330  Value *CondOp1 = LatchCmp->getOperand(1);
331  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
332  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
333      !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
334    LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
335    return false;
336  }
337
338  return true;
339}
340
341// Return true if \p Lp and all its nested loops are uniform with regard to \p
342// OuterLp.
343static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
344  if (!isUniformLoop(Lp, OuterLp))
345    return false;
346
347  // Check if nested loops are uniform.
348  for (Loop *SubLp : *Lp)
349    if (!isUniformLoopNest(SubLp, OuterLp))
350      return false;
351
352  return true;
353}
354
355/// Check whether it is safe to if-convert this phi node.
356///
357/// Phi nodes with constant expressions that can trap are not safe to if
358/// convert.
359static bool canIfConvertPHINodes(BasicBlock *BB) {
360  for (PHINode &Phi : BB->phis()) {
361    for (Value *V : Phi.incoming_values())
362      if (auto *C = dyn_cast<Constant>(V))
363        if (C->canTrap())
364          return false;
365  }
366  return true;
367}
368
369static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
370  if (Ty->isPointerTy())
371    return DL.getIntPtrType(Ty);
372
373  // It is possible that char's or short's overflow when we ask for the loop's
374  // trip count, work around this by changing the type size.
375  if (Ty->getScalarSizeInBits() < 32)
376    return Type::getInt32Ty(Ty->getContext());
377
378  return Ty;
379}
380
381static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
382  Ty0 = convertPointerToIntegerType(DL, Ty0);
383  Ty1 = convertPointerToIntegerType(DL, Ty1);
384  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
385    return Ty0;
386  return Ty1;
387}
388
389/// Check that the instruction has outside loop users and is not an
390/// identified reduction variable.
391static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
392                               SmallPtrSetImpl<Value *> &AllowedExit) {
393  // Reductions, Inductions and non-header phis are allowed to have exit users. All
394  // other instructions must not have external users.
395  if (!AllowedExit.count(Inst))
396    // Check that all of the users of the loop are inside the BB.
397    for (User *U : Inst->users()) {
398      Instruction *UI = cast<Instruction>(U);
399      // This user may be a reduction exit value.
400      if (!TheLoop->contains(UI)) {
401        LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
402        return true;
403      }
404    }
405  return false;
406}
407
408int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
409  const ValueToValueMap &Strides =
410      getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
411
412  bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
413  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
414  if (Stride == 1 || Stride == -1)
415    return Stride;
416  return 0;
417}
418
419bool LoopVectorizationLegality::isUniform(Value *V) {
420  return LAI->isUniform(V);
421}
422
423bool LoopVectorizationLegality::canVectorizeOuterLoop() {
424  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
425  // Store the result and return it at the end instead of exiting early, in case
426  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
427  bool Result = true;
428  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
429
430  for (BasicBlock *BB : TheLoop->blocks()) {
431    // Check whether the BB terminator is a BranchInst. Any other terminator is
432    // not supported yet.
433    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
434    if (!Br) {
435      reportVectorizationFailure("Unsupported basic block terminator",
436          "loop control flow is not understood by vectorizer",
437          "CFGNotUnderstood", ORE, TheLoop);
438      if (DoExtraAnalysis)
439        Result = false;
440      else
441        return false;
442    }
443
444    // Check whether the BranchInst is a supported one. Only unconditional
445    // branches, conditional branches with an outer loop invariant condition or
446    // backedges are supported.
447    // FIXME: We skip these checks when VPlan predication is enabled as we
448    // want to allow divergent branches. This whole check will be removed
449    // once VPlan predication is on by default.
450    if (!EnableVPlanPredication && Br && Br->isConditional() &&
451        !TheLoop->isLoopInvariant(Br->getCondition()) &&
452        !LI->isLoopHeader(Br->getSuccessor(0)) &&
453        !LI->isLoopHeader(Br->getSuccessor(1))) {
454      reportVectorizationFailure("Unsupported conditional branch",
455          "loop control flow is not understood by vectorizer",
456          "CFGNotUnderstood", ORE, TheLoop);
457      if (DoExtraAnalysis)
458        Result = false;
459      else
460        return false;
461    }
462  }
463
464  // Check whether inner loops are uniform. At this point, we only support
465  // simple outer loops scenarios with uniform nested loops.
466  if (!isUniformLoopNest(TheLoop /*loop nest*/,
467                         TheLoop /*context outer loop*/)) {
468    reportVectorizationFailure("Outer loop contains divergent loops",
469        "loop control flow is not understood by vectorizer",
470        "CFGNotUnderstood", ORE, TheLoop);
471    if (DoExtraAnalysis)
472      Result = false;
473    else
474      return false;
475  }
476
477  // Check whether we are able to set up outer loop induction.
478  if (!setupOuterLoopInductions()) {
479    reportVectorizationFailure("Unsupported outer loop Phi(s)",
480                               "Unsupported outer loop Phi(s)",
481                               "UnsupportedPhi", ORE, TheLoop);
482    if (DoExtraAnalysis)
483      Result = false;
484    else
485      return false;
486  }
487
488  return Result;
489}
490
491void LoopVectorizationLegality::addInductionPhi(
492    PHINode *Phi, const InductionDescriptor &ID,
493    SmallPtrSetImpl<Value *> &AllowedExit) {
494  Inductions[Phi] = ID;
495
496  // In case this induction also comes with casts that we know we can ignore
497  // in the vectorized loop body, record them here. All casts could be recorded
498  // here for ignoring, but suffices to record only the first (as it is the
499  // only one that may bw used outside the cast sequence).
500  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
501  if (!Casts.empty())
502    InductionCastsToIgnore.insert(*Casts.begin());
503
504  Type *PhiTy = Phi->getType();
505  const DataLayout &DL = Phi->getModule()->getDataLayout();
506
507  // Get the widest type.
508  if (!PhiTy->isFloatingPointTy()) {
509    if (!WidestIndTy)
510      WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
511    else
512      WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
513  }
514
515  // Int inductions are special because we only allow one IV.
516  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
517      ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
518      isa<Constant>(ID.getStartValue()) &&
519      cast<Constant>(ID.getStartValue())->isNullValue()) {
520
521    // Use the phi node with the widest type as induction. Use the last
522    // one if there are multiple (no good reason for doing this other
523    // than it is expedient). We've checked that it begins at zero and
524    // steps by one, so this is a canonical induction variable.
525    if (!PrimaryInduction || PhiTy == WidestIndTy)
526      PrimaryInduction = Phi;
527  }
528
529  // Both the PHI node itself, and the "post-increment" value feeding
530  // back into the PHI node may have external users.
531  // We can allow those uses, except if the SCEVs we have for them rely
532  // on predicates that only hold within the loop, since allowing the exit
533  // currently means re-using this SCEV outside the loop (see PR33706 for more
534  // details).
535  if (PSE.getUnionPredicate().isAlwaysTrue()) {
536    AllowedExit.insert(Phi);
537    AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
538  }
539
540  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
541}
542
543bool LoopVectorizationLegality::setupOuterLoopInductions() {
544  BasicBlock *Header = TheLoop->getHeader();
545
546  // Returns true if a given Phi is a supported induction.
547  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
548    InductionDescriptor ID;
549    if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
550        ID.getKind() == InductionDescriptor::IK_IntInduction) {
551      addInductionPhi(&Phi, ID, AllowedExit);
552      return true;
553    } else {
554      // Bail out for any Phi in the outer loop header that is not a supported
555      // induction.
556      LLVM_DEBUG(
557          dbgs()
558          << "LV: Found unsupported PHI for outer loop vectorization.\n");
559      return false;
560    }
561  };
562
563  if (llvm::all_of(Header->phis(), isSupportedPhi))
564    return true;
565  else
566    return false;
567}
568
569bool LoopVectorizationLegality::canVectorizeInstrs() {
570  BasicBlock *Header = TheLoop->getHeader();
571
572  // Look for the attribute signaling the absence of NaNs.
573  Function &F = *Header->getParent();
574  HasFunNoNaNAttr =
575      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
576
577  // For each block in the loop.
578  for (BasicBlock *BB : TheLoop->blocks()) {
579    // Scan the instructions in the block and look for hazards.
580    for (Instruction &I : *BB) {
581      if (auto *Phi = dyn_cast<PHINode>(&I)) {
582        Type *PhiTy = Phi->getType();
583        // Check that this PHI type is allowed.
584        if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
585            !PhiTy->isPointerTy()) {
586          reportVectorizationFailure("Found a non-int non-pointer PHI",
587                                     "loop control flow is not understood by vectorizer",
588                                     "CFGNotUnderstood", ORE, TheLoop);
589          return false;
590        }
591
592        // If this PHINode is not in the header block, then we know that we
593        // can convert it to select during if-conversion. No need to check if
594        // the PHIs in this block are induction or reduction variables.
595        if (BB != Header) {
596          // Non-header phi nodes that have outside uses can be vectorized. Add
597          // them to the list of allowed exits.
598          // Unsafe cyclic dependencies with header phis are identified during
599          // legalization for reduction, induction and first order
600          // recurrences.
601          AllowedExit.insert(&I);
602          continue;
603        }
604
605        // We only allow if-converted PHIs with exactly two incoming values.
606        if (Phi->getNumIncomingValues() != 2) {
607          reportVectorizationFailure("Found an invalid PHI",
608              "loop control flow is not understood by vectorizer",
609              "CFGNotUnderstood", ORE, TheLoop, Phi);
610          return false;
611        }
612
613        RecurrenceDescriptor RedDes;
614        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
615                                                 DT)) {
616          if (RedDes.hasUnsafeAlgebra())
617            Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
618          AllowedExit.insert(RedDes.getLoopExitInstr());
619          Reductions[Phi] = RedDes;
620          continue;
621        }
622
623        // TODO: Instead of recording the AllowedExit, it would be good to record the
624        // complementary set: NotAllowedExit. These include (but may not be
625        // limited to):
626        // 1. Reduction phis as they represent the one-before-last value, which
627        // is not available when vectorized
628        // 2. Induction phis and increment when SCEV predicates cannot be used
629        // outside the loop - see addInductionPhi
630        // 3. Non-Phis with outside uses when SCEV predicates cannot be used
631        // outside the loop - see call to hasOutsideLoopUser in the non-phi
632        // handling below
633        // 4. FirstOrderRecurrence phis that can possibly be handled by
634        // extraction.
635        // By recording these, we can then reason about ways to vectorize each
636        // of these NotAllowedExit.
637        InductionDescriptor ID;
638        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
639          addInductionPhi(Phi, ID, AllowedExit);
640          if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
641            Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
642          continue;
643        }
644
645        if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
646                                                         SinkAfter, DT)) {
647          FirstOrderRecurrences.insert(Phi);
648          continue;
649        }
650
651        // As a last resort, coerce the PHI to a AddRec expression
652        // and re-try classifying it a an induction PHI.
653        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
654          addInductionPhi(Phi, ID, AllowedExit);
655          continue;
656        }
657
658        reportVectorizationFailure("Found an unidentified PHI",
659            "value that could not be identified as "
660            "reduction is used outside the loop",
661            "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
662        return false;
663      } // end of PHI handling
664
665      // We handle calls that:
666      //   * Are debug info intrinsics.
667      //   * Have a mapping to an IR intrinsic.
668      //   * Have a vector version available.
669      auto *CI = dyn_cast<CallInst>(&I);
670      if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
671          !isa<DbgInfoIntrinsic>(CI) &&
672          !(CI->getCalledFunction() && TLI &&
673            TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
674        // If the call is a recognized math libary call, it is likely that
675        // we can vectorize it given loosened floating-point constraints.
676        LibFunc Func;
677        bool IsMathLibCall =
678            TLI && CI->getCalledFunction() &&
679            CI->getType()->isFloatingPointTy() &&
680            TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
681            TLI->hasOptimizedCodeGen(Func);
682
683        if (IsMathLibCall) {
684          // TODO: Ideally, we should not use clang-specific language here,
685          // but it's hard to provide meaningful yet generic advice.
686          // Also, should this be guarded by allowExtraAnalysis() and/or be part
687          // of the returned info from isFunctionVectorizable()?
688          reportVectorizationFailure("Found a non-intrinsic callsite",
689              "library call cannot be vectorized. "
690              "Try compiling with -fno-math-errno, -ffast-math, "
691              "or similar flags",
692              "CantVectorizeLibcall", ORE, TheLoop, CI);
693        } else {
694          reportVectorizationFailure("Found a non-intrinsic callsite",
695                                     "call instruction cannot be vectorized",
696                                     "CantVectorizeLibcall", ORE, TheLoop, CI);
697        }
698        return false;
699      }
700
701      // Some intrinsics have scalar arguments and should be same in order for
702      // them to be vectorized (i.e. loop invariant).
703      if (CI) {
704        auto *SE = PSE.getSE();
705        Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
706        for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
707          if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
708            if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
709              reportVectorizationFailure("Found unvectorizable intrinsic",
710                  "intrinsic instruction cannot be vectorized",
711                  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
712              return false;
713            }
714          }
715      }
716
717      // Check that the instruction return type is vectorizable.
718      // Also, we can't vectorize extractelement instructions.
719      if ((!VectorType::isValidElementType(I.getType()) &&
720           !I.getType()->isVoidTy()) ||
721          isa<ExtractElementInst>(I)) {
722        reportVectorizationFailure("Found unvectorizable type",
723            "instruction return type cannot be vectorized",
724            "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
725        return false;
726      }
727
728      // Check that the stored type is vectorizable.
729      if (auto *ST = dyn_cast<StoreInst>(&I)) {
730        Type *T = ST->getValueOperand()->getType();
731        if (!VectorType::isValidElementType(T)) {
732          reportVectorizationFailure("Store instruction cannot be vectorized",
733                                     "store instruction cannot be vectorized",
734                                     "CantVectorizeStore", ORE, TheLoop, ST);
735          return false;
736        }
737
738        // For nontemporal stores, check that a nontemporal vector version is
739        // supported on the target.
740        if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
741          // Arbitrarily try a vector of 2 elements.
742          Type *VecTy = VectorType::get(T, /*NumElements=*/2);
743          assert(VecTy && "did not find vectorized version of stored type");
744          const MaybeAlign Alignment = getLoadStoreAlignment(ST);
745          assert(Alignment && "Alignment should be set");
746          if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
747            reportVectorizationFailure(
748                "nontemporal store instruction cannot be vectorized",
749                "nontemporal store instruction cannot be vectorized",
750                "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
751            return false;
752          }
753        }
754
755      } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
756        if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
757          // For nontemporal loads, check that a nontemporal vector version is
758          // supported on the target (arbitrarily try a vector of 2 elements).
759          Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
760          assert(VecTy && "did not find vectorized version of load type");
761          const MaybeAlign Alignment = getLoadStoreAlignment(LD);
762          assert(Alignment && "Alignment should be set");
763          if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
764            reportVectorizationFailure(
765                "nontemporal load instruction cannot be vectorized",
766                "nontemporal load instruction cannot be vectorized",
767                "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
768            return false;
769          }
770        }
771
772        // FP instructions can allow unsafe algebra, thus vectorizable by
773        // non-IEEE-754 compliant SIMD units.
774        // This applies to floating-point math operations and calls, not memory
775        // operations, shuffles, or casts, as they don't change precision or
776        // semantics.
777      } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
778                 !I.isFast()) {
779        LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
780        Hints->setPotentiallyUnsafe();
781      }
782
783      // Reduction instructions are allowed to have exit users.
784      // All other instructions must not have external users.
785      if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
786        // We can safely vectorize loops where instructions within the loop are
787        // used outside the loop only if the SCEV predicates within the loop is
788        // same as outside the loop. Allowing the exit means reusing the SCEV
789        // outside the loop.
790        if (PSE.getUnionPredicate().isAlwaysTrue()) {
791          AllowedExit.insert(&I);
792          continue;
793        }
794        reportVectorizationFailure("Value cannot be used outside the loop",
795                                   "value cannot be used outside the loop",
796                                   "ValueUsedOutsideLoop", ORE, TheLoop, &I);
797        return false;
798      }
799    } // next instr.
800  }
801
802  if (!PrimaryInduction) {
803    if (Inductions.empty()) {
804      reportVectorizationFailure("Did not find one integer induction var",
805          "loop induction variable could not be identified",
806          "NoInductionVariable", ORE, TheLoop);
807      return false;
808    } else if (!WidestIndTy) {
809      reportVectorizationFailure("Did not find one integer induction var",
810          "integer loop induction variable could not be identified",
811          "NoIntegerInductionVariable", ORE, TheLoop);
812      return false;
813    } else {
814      LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
815    }
816  }
817
818  // For first order recurrences, we use the previous value (incoming value from
819  // the latch) to check if it dominates all users of the recurrence. Bail out
820  // if we have to sink such an instruction for another recurrence, as the
821  // dominance requirement may not hold after sinking.
822  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
823  if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
824        Instruction *V =
825            cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
826        return SinkAfter.find(V) != SinkAfter.end();
827      }))
828    return false;
829
830  // Now we know the widest induction type, check if our found induction
831  // is the same size. If it's not, unset it here and InnerLoopVectorizer
832  // will create another.
833  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
834    PrimaryInduction = nullptr;
835
836  return true;
837}
838
839bool LoopVectorizationLegality::canVectorizeMemory() {
840  LAI = &(*GetLAA)(*TheLoop);
841  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
842  if (LAR) {
843    ORE->emit([&]() {
844      return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
845                                        "loop not vectorized: ", *LAR);
846    });
847  }
848  if (!LAI->canVectorizeMemory())
849    return false;
850
851  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
852    reportVectorizationFailure("Stores to a uniform address",
853        "write to a loop invariant address could not be vectorized",
854        "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
855    return false;
856  }
857  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
858  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
859
860  return true;
861}
862
863bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
864  Value *In0 = const_cast<Value *>(V);
865  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
866  if (!PN)
867    return false;
868
869  return Inductions.count(PN);
870}
871
872bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
873  auto *Inst = dyn_cast<Instruction>(V);
874  return (Inst && InductionCastsToIgnore.count(Inst));
875}
876
877bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
878  return isInductionPhi(V) || isCastedInductionVariable(V);
879}
880
881bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
882  return FirstOrderRecurrences.count(Phi);
883}
884
885bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
886  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
887}
888
889bool LoopVectorizationLegality::blockCanBePredicated(
890    BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
891  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
892
893  for (Instruction &I : *BB) {
894    // Check that we don't have a constant expression that can trap as operand.
895    for (Value *Operand : I.operands()) {
896      if (auto *C = dyn_cast<Constant>(Operand))
897        if (C->canTrap())
898          return false;
899    }
900    // We might be able to hoist the load.
901    if (I.mayReadFromMemory()) {
902      auto *LI = dyn_cast<LoadInst>(&I);
903      if (!LI)
904        return false;
905      if (!SafePtrs.count(LI->getPointerOperand())) {
906        // !llvm.mem.parallel_loop_access implies if-conversion safety.
907        // Otherwise, record that the load needs (real or emulated) masking
908        // and let the cost model decide.
909        if (!IsAnnotatedParallel || PreserveGuards)
910          MaskedOp.insert(LI);
911        continue;
912      }
913    }
914
915    if (I.mayWriteToMemory()) {
916      auto *SI = dyn_cast<StoreInst>(&I);
917      if (!SI)
918        return false;
919      // Predicated store requires some form of masking:
920      // 1) masked store HW instruction,
921      // 2) emulation via load-blend-store (only if safe and legal to do so,
922      //    be aware on the race conditions), or
923      // 3) element-by-element predicate check and scalar store.
924      MaskedOp.insert(SI);
925      continue;
926    }
927    if (I.mayThrow())
928      return false;
929  }
930
931  return true;
932}
933
934bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
935  if (!EnableIfConversion) {
936    reportVectorizationFailure("If-conversion is disabled",
937                               "if-conversion is disabled",
938                               "IfConversionDisabled",
939                               ORE, TheLoop);
940    return false;
941  }
942
943  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
944
945  // A list of pointers which are known to be dereferenceable within scope of
946  // the loop body for each iteration of the loop which executes.  That is,
947  // the memory pointed to can be dereferenced (with the access size implied by
948  // the value's type) unconditionally within the loop header without
949  // introducing a new fault.
950  SmallPtrSet<Value *, 8> SafePointes;
951
952  // Collect safe addresses.
953  for (BasicBlock *BB : TheLoop->blocks()) {
954    if (!blockNeedsPredication(BB)) {
955      for (Instruction &I : *BB)
956        if (auto *Ptr = getLoadStorePointerOperand(&I))
957          SafePointes.insert(Ptr);
958      continue;
959    }
960
961    // For a block which requires predication, a address may be safe to access
962    // in the loop w/o predication if we can prove dereferenceability facts
963    // sufficient to ensure it'll never fault within the loop. For the moment,
964    // we restrict this to loads; stores are more complicated due to
965    // concurrency restrictions.
966    ScalarEvolution &SE = *PSE.getSE();
967    for (Instruction &I : *BB) {
968      LoadInst *LI = dyn_cast<LoadInst>(&I);
969      if (LI && !mustSuppressSpeculation(*LI) &&
970          isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
971        SafePointes.insert(LI->getPointerOperand());
972    }
973  }
974
975  // Collect the blocks that need predication.
976  BasicBlock *Header = TheLoop->getHeader();
977  for (BasicBlock *BB : TheLoop->blocks()) {
978    // We don't support switch statements inside loops.
979    if (!isa<BranchInst>(BB->getTerminator())) {
980      reportVectorizationFailure("Loop contains a switch statement",
981                                 "loop contains a switch statement",
982                                 "LoopContainsSwitch", ORE, TheLoop,
983                                 BB->getTerminator());
984      return false;
985    }
986
987    // We must be able to predicate all blocks that need to be predicated.
988    if (blockNeedsPredication(BB)) {
989      if (!blockCanBePredicated(BB, SafePointes)) {
990        reportVectorizationFailure(
991            "Control flow cannot be substituted for a select",
992            "control flow cannot be substituted for a select",
993            "NoCFGForSelect", ORE, TheLoop,
994            BB->getTerminator());
995        return false;
996      }
997    } else if (BB != Header && !canIfConvertPHINodes(BB)) {
998      reportVectorizationFailure(
999          "Control flow cannot be substituted for a select",
1000          "control flow cannot be substituted for a select",
1001          "NoCFGForSelect", ORE, TheLoop,
1002          BB->getTerminator());
1003      return false;
1004    }
1005  }
1006
1007  // We can if-convert this loop.
1008  return true;
1009}
1010
1011// Helper function to canVectorizeLoopNestCFG.
1012bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1013                                                    bool UseVPlanNativePath) {
1014  assert((UseVPlanNativePath || Lp->empty()) &&
1015         "VPlan-native path is not enabled.");
1016
1017  // TODO: ORE should be improved to show more accurate information when an
1018  // outer loop can't be vectorized because a nested loop is not understood or
1019  // legal. Something like: "outer_loop_location: loop not vectorized:
1020  // (inner_loop_location) loop control flow is not understood by vectorizer".
1021
1022  // Store the result and return it at the end instead of exiting early, in case
1023  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1024  bool Result = true;
1025  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1026
1027  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1028  // be canonicalized.
1029  if (!Lp->getLoopPreheader()) {
1030    reportVectorizationFailure("Loop doesn't have a legal pre-header",
1031        "loop control flow is not understood by vectorizer",
1032        "CFGNotUnderstood", ORE, TheLoop);
1033    if (DoExtraAnalysis)
1034      Result = false;
1035    else
1036      return false;
1037  }
1038
1039  // We must have a single backedge.
1040  if (Lp->getNumBackEdges() != 1) {
1041    reportVectorizationFailure("The loop must have a single backedge",
1042        "loop control flow is not understood by vectorizer",
1043        "CFGNotUnderstood", ORE, TheLoop);
1044    if (DoExtraAnalysis)
1045      Result = false;
1046    else
1047      return false;
1048  }
1049
1050  // We must have a single exiting block.
1051  if (!Lp->getExitingBlock()) {
1052    reportVectorizationFailure("The loop must have an exiting block",
1053        "loop control flow is not understood by vectorizer",
1054        "CFGNotUnderstood", ORE, TheLoop);
1055    if (DoExtraAnalysis)
1056      Result = false;
1057    else
1058      return false;
1059  }
1060
1061  // We only handle bottom-tested loops, i.e. loop in which the condition is
1062  // checked at the end of each iteration. With that we can assume that all
1063  // instructions in the loop are executed the same number of times.
1064  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1065    reportVectorizationFailure("The exiting block is not the loop latch",
1066        "loop control flow is not understood by vectorizer",
1067        "CFGNotUnderstood", ORE, TheLoop);
1068    if (DoExtraAnalysis)
1069      Result = false;
1070    else
1071      return false;
1072  }
1073
1074  return Result;
1075}
1076
1077bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1078    Loop *Lp, bool UseVPlanNativePath) {
1079  // Store the result and return it at the end instead of exiting early, in case
1080  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1081  bool Result = true;
1082  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1083  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1084    if (DoExtraAnalysis)
1085      Result = false;
1086    else
1087      return false;
1088  }
1089
1090  // Recursively check whether the loop control flow of nested loops is
1091  // understood.
1092  for (Loop *SubLp : *Lp)
1093    if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1094      if (DoExtraAnalysis)
1095        Result = false;
1096      else
1097        return false;
1098    }
1099
1100  return Result;
1101}
1102
1103bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1104  // Store the result and return it at the end instead of exiting early, in case
1105  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1106  bool Result = true;
1107
1108  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1109  // Check whether the loop-related control flow in the loop nest is expected by
1110  // vectorizer.
1111  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1112    if (DoExtraAnalysis)
1113      Result = false;
1114    else
1115      return false;
1116  }
1117
1118  // We need to have a loop header.
1119  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1120                    << '\n');
1121
1122  // Specific checks for outer loops. We skip the remaining legal checks at this
1123  // point because they don't support outer loops.
1124  if (!TheLoop->empty()) {
1125    assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1126
1127    if (!canVectorizeOuterLoop()) {
1128      reportVectorizationFailure("Unsupported outer loop",
1129                                 "unsupported outer loop",
1130                                 "UnsupportedOuterLoop",
1131                                 ORE, TheLoop);
1132      // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1133      // outer loops.
1134      return false;
1135    }
1136
1137    LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1138    return Result;
1139  }
1140
1141  assert(TheLoop->empty() && "Inner loop expected.");
1142  // Check if we can if-convert non-single-bb loops.
1143  unsigned NumBlocks = TheLoop->getNumBlocks();
1144  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1145    LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1146    if (DoExtraAnalysis)
1147      Result = false;
1148    else
1149      return false;
1150  }
1151
1152  // Check if we can vectorize the instructions and CFG in this loop.
1153  if (!canVectorizeInstrs()) {
1154    LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1155    if (DoExtraAnalysis)
1156      Result = false;
1157    else
1158      return false;
1159  }
1160
1161  // Go over each instruction and look at memory deps.
1162  if (!canVectorizeMemory()) {
1163    LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1164    if (DoExtraAnalysis)
1165      Result = false;
1166    else
1167      return false;
1168  }
1169
1170  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1171                    << (LAI->getRuntimePointerChecking()->Need
1172                            ? " (with a runtime bound check)"
1173                            : "")
1174                    << "!\n");
1175
1176  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1177  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1178    SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1179
1180  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1181    reportVectorizationFailure("Too many SCEV checks needed",
1182        "Too many SCEV assumptions need to be made and checked at runtime",
1183        "TooManySCEVRunTimeChecks", ORE, TheLoop);
1184    if (DoExtraAnalysis)
1185      Result = false;
1186    else
1187      return false;
1188  }
1189
1190  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1191  // we can vectorize, and at this point we don't have any other mem analysis
1192  // which may limit our maximum vectorization factor, so just return true with
1193  // no restrictions.
1194  return Result;
1195}
1196
1197bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1198
1199  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1200
1201  if (!PrimaryInduction) {
1202    reportVectorizationFailure(
1203        "No primary induction, cannot fold tail by masking",
1204        "Missing a primary induction variable in the loop, which is "
1205        "needed in order to fold tail by masking as required.",
1206        "NoPrimaryInduction", ORE, TheLoop);
1207    return false;
1208  }
1209
1210  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1211
1212  for (auto &Reduction : *getReductionVars())
1213    ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1214
1215  // TODO: handle non-reduction outside users when tail is folded by masking.
1216  for (auto *AE : AllowedExit) {
1217    // Check that all users of allowed exit values are inside the loop or
1218    // are the live-out of a reduction.
1219    if (ReductionLiveOuts.count(AE))
1220      continue;
1221    for (User *U : AE->users()) {
1222      Instruction *UI = cast<Instruction>(U);
1223      if (TheLoop->contains(UI))
1224        continue;
1225      reportVectorizationFailure(
1226          "Cannot fold tail by masking, loop has an outside user for",
1227          "Cannot fold tail by masking in the presence of live outs.",
1228          "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1229      return false;
1230    }
1231  }
1232
1233  // The list of pointers that we can safely read and write to remains empty.
1234  SmallPtrSet<Value *, 8> SafePointers;
1235
1236  // Check and mark all blocks for predication, including those that ordinarily
1237  // do not need predication such as the header block.
1238  for (BasicBlock *BB : TheLoop->blocks()) {
1239    if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1240      reportVectorizationFailure(
1241          "Cannot fold tail by masking as required",
1242          "control flow cannot be substituted for a select",
1243          "NoCFGForSelect", ORE, TheLoop,
1244          BB->getTerminator());
1245      return false;
1246    }
1247  }
1248
1249  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1250  return true;
1251}
1252
1253} // namespace llvm
1254