1 files changed, 99 insertions, 91 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 2ab0848193f6..3c484fb0d28a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -37,11 +37,15 @@ static cl::opt<bool>
     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
                        cl::desc("Enable if-conversion during vectorization."));
 
-static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
-    "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
-    cl::desc("The maximum allowed number of runtime memory checks with a "
-             "vectorize(enable) pragma."));
+namespace llvm {
+cl::opt<bool>
+    HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
+                         cl::desc("Allow enabling loop hints to reorder "
+                                  "FP operations during vectorization."));
+}
 
+// TODO: Move size-based thresholds out of legality checking, make cost based
+// decisions instead of hard thresholds.
 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
     cl::desc("The maximum number of SCEV checks allowed."));
@@ -51,6 +55,23 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
     cl::desc("The maximum number of SCEV checks allowed with a "
              "vectorize(enable) pragma"));
 
+// FIXME: When scalable vectorization is stable enough, change the default
+// to SK_PreferFixedWidth.
+static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
+    "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
+    cl::Hidden,
+    cl::desc("Control whether the compiler can use scalable vectors to "
+             "vectorize a loop"),
+    cl::values(
+        clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
+                   "Scalable vectorization is disabled."),
+        clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
+                   "Scalable vectorization is available, but favor fixed-width "
+                   "vectorization when the cost is inconclusive."),
+        clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
+                   "Scalable vectorization is available and favored when the "
+                   "cost is inconclusive.")));
+
 /// Maximum vectorization interleave count.
 static const unsigned MaxInterleaveFactor = 16;
 
@@ -60,7 +81,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
   switch (Kind) {
   case HK_WIDTH:
     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
-  case HK_UNROLL:
+  case HK_INTERLEAVE:
     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
   case HK_FORCE:
     return (Val <= 1);
@@ -76,12 +97,12 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
                                        bool InterleaveOnlyWhenForced,
                                        OptimizationRemarkEmitter &ORE)
     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
-      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
+      Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
-      Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
-      ORE(ORE) {
+      Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
+      TheLoop(L), ORE(ORE) {
   // Populate values with existing loop metadata.
   getHintsFromMetadata();
 
@@ -89,13 +110,23 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
   if (VectorizerParams::isInterleaveForced())
     Interleave.Value = VectorizerParams::VectorizationInterleave;
 
+  if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
+    // If the width is set, but the metadata says nothing about the scalable
+    // property, then assume it concerns only a fixed-width UserVF.
+    // If width is not set, the flag takes precedence.
+    Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
+  else if (ScalableVectorization == SK_FixedWidthOnly)
+    // If the flag is set to disable any use of scalable vectors, override the
+    // loop hint.
+    Scalable.Value = SK_FixedWidthOnly;
+
   if (IsVectorized.Value != 1)
     // If the vectorization width and interleaving count are both 1 then
     // consider the loop to have been already vectorized because there's
     // nothing more that we can do.
     IsVectorized.Value =
-        getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
-  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
+        getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
+  LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
              << "LV: Interleaving disabled by the pass manager\n");
 }
 
@@ -168,8 +199,8 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
         R << " (Force=" << NV("Force", true);
         if (Width.Value != 0)
           R << ", Vector Width=" << NV("VectorWidth", getWidth());
-        if (Interleave.Value != 0)
-          R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
+        if (getInterleave() != 0)
+          R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
         R << ")";
       }
       return R;
@@ -187,6 +218,15 @@ const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
   return OptimizationRemarkAnalysis::AlwaysPrint;
 }
 
+bool LoopVectorizeHints::allowReordering() const {
+  // Allow the vectorizer to change the order of operations if enabling
+  // loop hints are provided
+  ElementCount EC = getWidth();
+  return HintsAllowReordering &&
+         (getForce() == LoopVectorizeHints::FK_Enabled ||
+          EC.getKnownMinValue() > 1);
+}
+
 void LoopVectorizeHints::getHintsFromMetadata() {
   MDNode *LoopID = TheLoop->getLoopID();
   if (!LoopID)
@@ -246,42 +286,6 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
   }
 }
 
-bool LoopVectorizationRequirements::doesNotMeet(
-    Function *F, Loop *L, const LoopVectorizeHints &Hints) {
-  const char *PassName = Hints.vectorizeAnalysisPassName();
-  bool Failed = false;
-  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
-    ORE.emit([&]() {
-      return OptimizationRemarkAnalysisFPCommute(
-                 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
-                 UnsafeAlgebraInst->getParent())
-             << "loop not vectorized: cannot prove it is safe to reorder "
-                "floating-point operations";
-    });
-    Failed = true;
-  }
-
-  // Test if runtime memcheck thresholds are exceeded.
-  bool PragmaThresholdReached =
-      NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
-  bool ThresholdReached =
-      NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
-  if ((ThresholdReached && !Hints.allowReordering()) ||
-      PragmaThresholdReached) {
-    ORE.emit([&]() {
-      return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
-                                                L->getStartLoc(),
-                                                L->getHeader())
-             << "loop not vectorized: cannot prove it is safe to reorder "
-                "memory operations";
-    });
-    LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
-    Failed = true;
-  }
-
-  return Failed;
-}
-
 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
 // executing the inner loop will execute the same iterations). This check is
@@ -415,7 +419,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
   return false;
 }
 
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
   const ValueToValueMap &Strides =
       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
 
@@ -582,7 +586,7 @@ bool LoopVectorizationLegality::setupOuterLoopInductions() {
 
 /// Checks if a function is scalarizable according to the TLI, in
 /// the sense that it should be vectorized and then expanded in
-/// multiple scalarcalls. This is represented in the
+/// multiple scalar calls. This is represented in the
 /// TLI via mappings that do not specify a vector name, as in the
 /// following example:
 ///
@@ -594,22 +598,24 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
   // Check that all known VFs are not associated to a vector
   // function, i.e. the vector name is emty.
-  if (Scalarize)
-    for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
-         VF <= WidestVF; VF *= 2) {
+  if (Scalarize) {
+    ElementCount WidestFixedVF, WidestScalableVF;
+    TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
+    for (ElementCount VF = ElementCount::getFixed(2);
+         ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
-    }
+    for (ElementCount VF = ElementCount::getScalable(1);
+         ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
+      Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
+    assert((WidestScalableVF.isZero() || !Scalarize) &&
+           "Caller may decide to scalarize a variant using a scalable VF");
+  }
   return Scalarize;
 }
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
 
-  // Look for the attribute signaling the absence of NaNs.
-  Function &F = *Header->getParent();
-  HasFunNoNaNAttr =
-      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
     // Scan the instructions in the block and look for hazards.
@@ -649,8 +655,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         RecurrenceDescriptor RedDes;
         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
                                                  DT)) {
-          if (RedDes.hasUnsafeAlgebra())
-            Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
+          Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
           continue;
@@ -673,8 +678,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         InductionDescriptor ID;
         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
           addInductionPhi(Phi, ID, AllowedExit);
-          if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
-            Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
+          Requirements->addExactFPMathInst(ID.getExactFPMathInst());
           continue;
         }
 
@@ -881,6 +885,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
                                         "loop not vectorized: ", *LAR);
     });
   }
+
   if (!LAI->canVectorizeMemory())
     return false;
 
@@ -890,12 +895,38 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
     return false;
   }
+
   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
-
   return true;
 }
 
+bool LoopVectorizationLegality::canVectorizeFPMath(
+    bool EnableStrictReductions) {
+
+  // First check if there is any ExactFP math or if we allow reassociations
+  if (!Requirements->getExactFPInst() || Hints->allowReordering())
+    return true;
+
+  // If the above is false, we have ExactFPMath & do not allow reordering.
+  // If the EnableStrictReductions flag is set, first check if we have any
+  // Exact FP induction vars, which we cannot vectorize.
+  if (!EnableStrictReductions ||
+      any_of(getInductionVars(), [&](auto &Induction) -> bool {
+        InductionDescriptor IndDesc = Induction.second;
+        return IndDesc.getExactFPMathInst();
+      }))
+    return false;
+
+  // We can now only vectorize if all reductions with Exact FP math also
+  // have the isOrdered flag set, which indicates that we can move the
+  // reduction operations in-loop.
+  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
+    const RecurrenceDescriptor &RdxDesc = Reduction.second;
+    return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
+  }));
+}
+
 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
   Value *In0 = const_cast<Value *>(V);
   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
@@ -918,17 +949,14 @@ bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
   return FirstOrderRecurrences.count(Phi);
 }
 
-bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
 }
 
 bool LoopVectorizationLegality::blockCanBePredicated(
     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
     SmallPtrSetImpl<const Instruction *> &MaskedOp,
-    SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
-    bool PreserveGuards) const {
-  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
-
+    SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
   for (Instruction &I : *BB) {
     // Check that we don't have a constant expression that can trap as operand.
     for (Value *Operand : I.operands()) {
@@ -956,11 +984,7 @@ bool LoopVectorizationLegality::blockCanBePredicated(
       if (!LI)
         return false;
       if (!SafePtrs.count(LI->getPointerOperand())) {
-        // !llvm.mem.parallel_loop_access implies if-conversion safety.
-        // Otherwise, record that the load needs (real or emulated) masking
-        // and let the cost model decide.
-        if (!IsAnnotatedParallel || PreserveGuards)
-          MaskedOp.insert(LI);
+        MaskedOp.insert(LI);
         continue;
       }
     }
@@ -1101,21 +1125,6 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
       return false;
   }
 
-  // We currently must have a single "exit block" after the loop. Note that
-  // multiple "exiting blocks" inside the loop are allowed, provided they all
-  // reach the single exit block.
-  // TODO: This restriction can be relaxed in the near future, it's here solely
-  // to allow separation of changes for review. We need to generalize the phi
-  // update logic in a number of places.
-  if (!Lp->getUniqueExitBlock()) {
-    reportVectorizationFailure("The loop must have a unique exit block",
-        "loop control flow is not understood by vectorizer",
-        "CFGNotUnderstood", ORE, TheLoop);
-    if (DoExtraAnalysis)
-      Result = false;
-    else
-      return false;
-  }
   return Result;
 }
 
@@ -1276,8 +1285,7 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
   // do not need predication such as the header block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
-                              TmpConditionalAssumes,
-                              /* MaskAllLoads= */ true)) {
+                              TmpConditionalAssumes)) {
       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
       return false;
     }