aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Transforms/Scalar
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/ADCE.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp752
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp110
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp157
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp136
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/Float2Int.cpp205
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp231
-rw-r--r--llvm/lib/Transforms/Scalar/GVNHoist.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp34
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp72
-rw-r--r--llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp142
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp480
-rw-r--r--llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDeletion.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp64
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp196
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp10
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRerollPass.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSink.cpp91
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp662
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp1774
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp (renamed from llvm/lib/Transforms/Scalar/LowerAtomic.cpp)84
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp57
-rw-r--r--llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp185
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp59
-rw-r--r--llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp46
-rw-r--r--llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp489
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp105
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp75
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp103
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp121
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/Sink.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp67
-rw-r--r--llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp306
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp2
76 files changed, 3405 insertions, 3964 deletions
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 1cda206a7e14..cdf9de8d78d5 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -35,7 +35,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index e4ec5f266eb8..9571e99dfb19 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,8 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Instructions.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -26,12 +24,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
index a5e65ffc45fe..155f47b49357 100644
--- a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
+++ b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
@@ -16,11 +16,8 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 95de59fa8262..cc12033fb677 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -57,6 +57,7 @@
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -65,7 +66,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -123,8 +123,8 @@ static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallBase &CB) {
return false;
}
-typedef std::pair<ICmpInst *, unsigned> ConditionTy;
-typedef SmallVector<ConditionTy, 2> ConditionsTy;
+using ConditionTy = std::pair<ICmpInst *, unsigned>;
+using ConditionsTy = SmallVector<ConditionTy, 2>;
/// If From has a conditional jump to To, add the condition to Conditions,
/// if it is relevant to any argument at CB.
@@ -301,10 +301,9 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
/// Note that in case any arguments at the call-site are constrained by its
/// predecessors, new call-sites with more constrained arguments will be
/// created in createCallSitesOnPredicatedArgument().
-static void splitCallSite(
- CallBase &CB,
- const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
- DomTreeUpdater &DTU) {
+static void splitCallSite(CallBase &CB,
+ ArrayRef<std::pair<BasicBlock *, ConditionsTy>> Preds,
+ DomTreeUpdater &DTU) {
BasicBlock *TailBB = CB.getParent();
bool IsMustTailCall = CB.isMustTailCall();
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 25e8c3ef3b48..8a1761505d59 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 13963657d183..6dfa2440023f 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -19,15 +19,16 @@
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Scalar.h"
#include <string>
@@ -42,48 +43,129 @@ DEBUG_COUNTER(EliminatedCounter, "conds-eliminated",
"Controls which conditions are eliminated");
static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
+static int64_t MinSignedConstraintValue = std::numeric_limits<int64_t>::min();
namespace {
-struct ConstraintTy {
- SmallVector<int64_t, 8> Coefficients;
- ConstraintTy(SmallVector<int64_t, 8> Coefficients)
- : Coefficients(Coefficients) {}
+class ConstraintInfo;
- unsigned size() const { return Coefficients.size(); }
+struct StackEntry {
+ unsigned NumIn;
+ unsigned NumOut;
+ bool IsNot;
+ bool IsSigned = false;
+ /// Variables that can be removed from the system once the stack entry gets
+ /// removed.
+ SmallVector<Value *, 2> ValuesToRelease;
+
+ StackEntry(unsigned NumIn, unsigned NumOut, bool IsNot, bool IsSigned,
+ SmallVector<Value *, 2> ValuesToRelease)
+ : NumIn(NumIn), NumOut(NumOut), IsNot(IsNot), IsSigned(IsSigned),
+ ValuesToRelease(ValuesToRelease) {}
};
-/// Struct to manage a list of constraints.
-struct ConstraintListTy {
- SmallVector<ConstraintTy, 4> Constraints;
+/// Struct to express a pre-condition of the form %Op0 Pred %Op1.
+struct PreconditionTy {
+ CmpInst::Predicate Pred;
+ Value *Op0;
+ Value *Op1;
- ConstraintListTy() {}
+ PreconditionTy(CmpInst::Predicate Pred, Value *Op0, Value *Op1)
+ : Pred(Pred), Op0(Op0), Op1(Op1) {}
+};
- ConstraintListTy(const SmallVector<ConstraintTy, 4> &Constraints)
- : Constraints(Constraints) {}
+struct ConstraintTy {
+ SmallVector<int64_t, 8> Coefficients;
+ SmallVector<PreconditionTy, 2> Preconditions;
- void mergeIn(const ConstraintListTy &Other) {
- append_range(Constraints, Other.Constraints);
- }
+ bool IsSigned = false;
+ bool IsEq = false;
+
+ ConstraintTy() = default;
- unsigned size() const { return Constraints.size(); }
+ ConstraintTy(SmallVector<int64_t, 8> Coefficients, bool IsSigned)
+ : Coefficients(Coefficients), IsSigned(IsSigned) {}
- unsigned empty() const { return Constraints.empty(); }
+ unsigned size() const { return Coefficients.size(); }
+
+ unsigned empty() const { return Coefficients.empty(); }
/// Returns true if any constraint has a non-zero coefficient for any of the
/// newly added indices. Zero coefficients for new indices are removed. If it
/// returns true, no new variable need to be added to the system.
bool needsNewIndices(const DenseMap<Value *, unsigned> &NewIndices) {
- assert(size() == 1);
for (unsigned I = 0; I < NewIndices.size(); ++I) {
- int64_t Last = get(0).Coefficients.pop_back_val();
+ int64_t Last = Coefficients.pop_back_val();
if (Last != 0)
return true;
}
return false;
}
- ConstraintTy &get(unsigned I) { return Constraints[I]; }
+ /// Returns true if all preconditions for this list of constraints are
+ /// satisfied given \p CS and the corresponding \p Value2Index mapping.
+ bool isValid(const ConstraintInfo &Info) const;
+};
+
+/// Wrapper encapsulating separate constraint systems and corresponding value
+/// mappings for both unsigned and signed information. Facts are added to and
+/// conditions are checked against the corresponding system depending on the
+/// signed-ness of their predicates. While the information is kept separate
+/// based on signed-ness, certain conditions can be transferred between the two
+/// systems.
+class ConstraintInfo {
+ DenseMap<Value *, unsigned> UnsignedValue2Index;
+ DenseMap<Value *, unsigned> SignedValue2Index;
+
+ ConstraintSystem UnsignedCS;
+ ConstraintSystem SignedCS;
+
+public:
+ DenseMap<Value *, unsigned> &getValue2Index(bool Signed) {
+ return Signed ? SignedValue2Index : UnsignedValue2Index;
+ }
+ const DenseMap<Value *, unsigned> &getValue2Index(bool Signed) const {
+ return Signed ? SignedValue2Index : UnsignedValue2Index;
+ }
+
+ ConstraintSystem &getCS(bool Signed) {
+ return Signed ? SignedCS : UnsignedCS;
+ }
+ const ConstraintSystem &getCS(bool Signed) const {
+ return Signed ? SignedCS : UnsignedCS;
+ }
+
+ void popLastConstraint(bool Signed) { getCS(Signed).popLastConstraint(); }
+ void popLastNVariables(bool Signed, unsigned N) {
+ getCS(Signed).popLastNVariables(N);
+ }
+
+ bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
+
+ void addFact(CmpInst::Predicate Pred, Value *A, Value *B, bool IsNegated,
+ unsigned NumIn, unsigned NumOut,
+ SmallVectorImpl<StackEntry> &DFSInStack);
+
+ /// Turn a comparison of the form \p Op0 \p Pred \p Op1 into a vector of
+ /// constraints, using indices from the corresponding constraint system.
+ /// Additional indices for newly discovered values are added to \p NewIndices.
+ ConstraintTy getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ DenseMap<Value *, unsigned> &NewIndices) const;
+
+ /// Turn a condition \p CmpI into a vector of constraints, using indices from
+ /// the corresponding constraint system. Additional indices for newly
+ /// discovered values are added to \p NewIndices.
+ ConstraintTy getConstraint(CmpInst *Cmp,
+ DenseMap<Value *, unsigned> &NewIndices) const {
+ return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
+ Cmp->getOperand(1), NewIndices);
+ }
+
+ /// Try to add information from \p A \p Pred \p B to the unsigned/signed
+ /// system if \p Pred is signed/unsigned.
+ void transferToOtherSystem(CmpInst::Predicate Pred, Value *A, Value *B,
+ bool IsNegated, unsigned NumIn, unsigned NumOut,
+ SmallVectorImpl<StackEntry> &DFSInStack);
};
} // namespace
@@ -92,11 +174,28 @@ struct ConstraintListTy {
// sum of the pairs equals \p V. The first pair is the constant-factor and X
// must be nullptr. If the expression cannot be decomposed, returns an empty
// vector.
-static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
+static SmallVector<std::pair<int64_t, Value *>, 4>
+decompose(Value *V, SmallVector<PreconditionTy, 4> &Preconditions,
+ bool IsSigned) {
+
+ auto CanUseSExt = [](ConstantInt *CI) {
+ const APInt &Val = CI->getValue();
+ return Val.sgt(MinSignedConstraintValue) && Val.slt(MaxConstraintValue);
+ };
+ // Decompose \p V used with a signed predicate.
+ if (IsSigned) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CanUseSExt(CI))
+ return {{CI->getSExtValue(), nullptr}};
+ }
+
+ return {{0, nullptr}, {1, V}};
+ }
+
if (auto *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->isNegative() || CI->uge(MaxConstraintValue))
+ if (CI->uge(MaxConstraintValue))
return {};
- return {{CI->getSExtValue(), nullptr}};
+ return {{CI->getZExtValue(), nullptr}};
}
auto *GEP = dyn_cast<GetElementPtrInst>(V);
if (GEP && GEP->getNumOperands() == 2 && GEP->isInBounds()) {
@@ -106,11 +205,13 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
// If the index is zero-extended, it is guaranteed to be positive.
if (match(GEP->getOperand(GEP->getNumOperands() - 1),
m_ZExt(m_Value(Op0)))) {
- if (match(Op0, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))))
+ if (match(Op0, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
return {{0, nullptr},
{1, GEP->getPointerOperand()},
{std::pow(int64_t(2), CI->getSExtValue()), Op1}};
- if (match(Op0, m_NSWAdd(m_Value(Op1), m_ConstantInt(CI))))
+ if (match(Op0, m_NSWAdd(m_Value(Op1), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
return {{CI->getSExtValue(), nullptr},
{1, GEP->getPointerOperand()},
{1, Op1}};
@@ -118,17 +219,19 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
}
if (match(GEP->getOperand(GEP->getNumOperands() - 1), m_ConstantInt(CI)) &&
- !CI->isNegative())
+ !CI->isNegative() && CanUseSExt(CI))
return {{CI->getSExtValue(), nullptr}, {1, GEP->getPointerOperand()}};
SmallVector<std::pair<int64_t, Value *>, 4> Result;
if (match(GEP->getOperand(GEP->getNumOperands() - 1),
- m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))
+ m_NUWShl(m_Value(Op0), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
Result = {{0, nullptr},
{1, GEP->getPointerOperand()},
{std::pow(int64_t(2), CI->getSExtValue()), Op0}};
else if (match(GEP->getOperand(GEP->getNumOperands() - 1),
- m_NSWAdd(m_Value(Op0), m_ConstantInt(CI))))
+ m_NSWAdd(m_Value(Op0), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
Result = {{CI->getSExtValue(), nullptr},
{1, GEP->getPointerOperand()},
{1, Op0}};
@@ -136,6 +239,10 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
Op0 = GEP->getOperand(GEP->getNumOperands() - 1);
Result = {{0, nullptr}, {1, GEP->getPointerOperand()}, {1, Op0}};
}
+ // If Op0 is signed non-negative, the GEP is increasing monotonically and
+ // can be de-composed.
+ Preconditions.emplace_back(CmpInst::ICMP_SGE, Op0,
+ ConstantInt::get(Op0->getType(), 0));
return Result;
}
@@ -145,12 +252,20 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
Value *Op1;
ConstantInt *CI;
- if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))))
+ if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))) &&
+ !CI->uge(MaxConstraintValue))
+ return {{CI->getZExtValue(), nullptr}, {1, Op0}};
+ if (match(V, m_Add(m_Value(Op0), m_ConstantInt(CI))) && CI->isNegative() &&
+ CanUseSExt(CI)) {
+ Preconditions.emplace_back(
+ CmpInst::ICMP_UGE, Op0,
+ ConstantInt::get(Op0->getType(), CI->getSExtValue() * -1));
return {{CI->getSExtValue(), nullptr}, {1, Op0}};
+ }
if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1))))
return {{0, nullptr}, {1, Op0}, {1, Op1}};
- if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
+ if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && CanUseSExt(CI))
return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
return {{0, nullptr}, {1, Op0}, {-1, Op1}};
@@ -158,73 +273,73 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
return {{0, nullptr}, {1, V}};
}
-/// Turn a condition \p CmpI into a vector of constraints, using indices from \p
-/// Value2Index. Additional indices for newly discovered values are added to \p
-/// NewIndices.
-static ConstraintListTy
-getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
- const DenseMap<Value *, unsigned> &Value2Index,
- DenseMap<Value *, unsigned> &NewIndices) {
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
-
- // First try to look up \p V in Value2Index and NewIndices. Otherwise add a
- // new entry to NewIndices.
- auto GetOrAddIndex = [&Value2Index, &NewIndices](Value *V) -> unsigned {
- auto V2I = Value2Index.find(V);
- if (V2I != Value2Index.end())
- return V2I->second;
- auto NewI = NewIndices.find(V);
- if (NewI != NewIndices.end())
- return NewI->second;
- auto Insert =
- NewIndices.insert({V, Value2Index.size() + NewIndices.size() + 1});
- return Insert.first->second;
- };
-
- if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE)
- return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0,
- Value2Index, NewIndices);
-
- if (Pred == CmpInst::ICMP_EQ) {
- if (match(Op1, m_Zero()))
- return getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index,
- NewIndices);
-
- auto A =
- getConstraint(CmpInst::ICMP_UGE, Op0, Op1, Value2Index, NewIndices);
- auto B =
- getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index, NewIndices);
- A.mergeIn(B);
- return A;
+ConstraintTy
+ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ DenseMap<Value *, unsigned> &NewIndices) const {
+ bool IsEq = false;
+ // Try to convert Pred to one of ULE/SLT/SLE/SLT.
+ switch (Pred) {
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE: {
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ std::swap(Op0, Op1);
+ break;
}
-
- if (Pred == CmpInst::ICMP_NE && match(Op1, m_Zero())) {
- return getConstraint(CmpInst::ICMP_UGT, Op0, Op1, Value2Index, NewIndices);
+ case CmpInst::ICMP_EQ:
+ if (match(Op1, m_Zero())) {
+ Pred = CmpInst::ICMP_ULE;
+ } else {
+ IsEq = true;
+ Pred = CmpInst::ICMP_ULE;
+ }
+ break;
+ case CmpInst::ICMP_NE:
+ if (!match(Op1, m_Zero()))
+ return {};
+ Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT);
+ std::swap(Op0, Op1);
+ break;
+ default:
+ break;
}
// Only ULE and ULT predicates are supported at the moment.
- if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT)
+ if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT &&
+ Pred != CmpInst::ICMP_SLE && Pred != CmpInst::ICMP_SLT)
return {};
- auto ADec = decompose(Op0->stripPointerCastsSameRepresentation());
- auto BDec = decompose(Op1->stripPointerCastsSameRepresentation());
+ SmallVector<PreconditionTy, 4> Preconditions;
+ bool IsSigned = CmpInst::isSigned(Pred);
+ auto &Value2Index = getValue2Index(IsSigned);
+ auto ADec = decompose(Op0->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned);
+ auto BDec = decompose(Op1->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned);
// Skip if decomposing either of the values failed.
if (ADec.empty() || BDec.empty())
return {};
- // Skip trivial constraints without any variables.
- if (ADec.size() == 1 && BDec.size() == 1)
- return {};
-
- Offset1 = ADec[0].first;
- Offset2 = BDec[0].first;
+ int64_t Offset1 = ADec[0].first;
+ int64_t Offset2 = BDec[0].first;
Offset1 *= -1;
// Create iterator ranges that skip the constant-factor.
auto VariablesA = llvm::drop_begin(ADec);
auto VariablesB = llvm::drop_begin(BDec);
+ // First try to look up \p V in Value2Index and NewIndices. Otherwise add a
+ // new entry to NewIndices.
+ auto GetOrAddIndex = [&Value2Index, &NewIndices](Value *V) -> unsigned {
+ auto V2I = Value2Index.find(V);
+ if (V2I != Value2Index.end())
+ return V2I->second;
+ auto Insert =
+ NewIndices.insert({V, Value2Index.size() + NewIndices.size() + 1});
+ return Insert.first->second;
+ };
+
// Make sure all variables have entries in Value2Index or NewIndices.
for (const auto &KV :
concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB))
@@ -232,22 +347,85 @@ getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
// Build result constraint, by first adding all coefficients from A and then
// subtracting all coefficients from B.
- SmallVector<int64_t, 8> R(Value2Index.size() + NewIndices.size() + 1, 0);
+ ConstraintTy Res(
+ SmallVector<int64_t, 8>(Value2Index.size() + NewIndices.size() + 1, 0),
+ IsSigned);
+ Res.IsEq = IsEq;
+ auto &R = Res.Coefficients;
for (const auto &KV : VariablesA)
R[GetOrAddIndex(KV.second)] += KV.first;
for (const auto &KV : VariablesB)
R[GetOrAddIndex(KV.second)] -= KV.first;
- R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0);
- return {{R}};
+ int64_t OffsetSum;
+ if (AddOverflow(Offset1, Offset2, OffsetSum))
+ return {};
+ if (Pred == (IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT))
+ if (AddOverflow(OffsetSum, int64_t(-1), OffsetSum))
+ return {};
+ R[0] = OffsetSum;
+ Res.Preconditions = std::move(Preconditions);
+ return Res;
+}
+
+bool ConstraintTy::isValid(const ConstraintInfo &Info) const {
+ return Coefficients.size() > 0 &&
+ all_of(Preconditions, [&Info](const PreconditionTy &C) {
+ return Info.doesHold(C.Pred, C.Op0, C.Op1);
+ });
+}
+
+bool ConstraintInfo::doesHold(CmpInst::Predicate Pred, Value *A,
+ Value *B) const {
+ DenseMap<Value *, unsigned> NewIndices;
+ auto R = getConstraint(Pred, A, B, NewIndices);
+
+ if (!NewIndices.empty())
+ return false;
+
+ // TODO: properly check NewIndices.
+ return NewIndices.empty() && R.Preconditions.empty() && !R.IsEq &&
+ !R.empty() &&
+ getCS(CmpInst::isSigned(Pred)).isConditionImplied(R.Coefficients);
}
-static ConstraintListTy
-getConstraint(CmpInst *Cmp, const DenseMap<Value *, unsigned> &Value2Index,
- DenseMap<Value *, unsigned> &NewIndices) {
- return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
- Cmp->getOperand(1), Value2Index, NewIndices);
+void ConstraintInfo::transferToOtherSystem(
+ CmpInst::Predicate Pred, Value *A, Value *B, bool IsNegated, unsigned NumIn,
+ unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack) {
+ // Check if we can combine facts from the signed and unsigned systems to
+ // derive additional facts.
+ if (!A->getType()->isIntegerTy())
+ return;
+ // FIXME: This currently depends on the order we add facts. Ideally we
+ // would first add all known facts and only then try to add additional
+ // facts.
+ switch (Pred) {
+ default:
+ break;
+ case CmpInst::ICMP_ULT:
+ // If B is a signed positive constant, A >=s 0 and A <s B.
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
+ addFact(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0),
+ IsNegated, NumIn, NumOut, DFSInStack);
+ addFact(CmpInst::ICMP_SLT, A, B, IsNegated, NumIn, NumOut, DFSInStack);
+ }
+ break;
+ case CmpInst::ICMP_SLT:
+ if (doesHold(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)))
+ addFact(CmpInst::ICMP_ULT, A, B, IsNegated, NumIn, NumOut, DFSInStack);
+ break;
+ case CmpInst::ICMP_SGT:
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
+ addFact(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0),
+ IsNegated, NumIn, NumOut, DFSInStack);
+ break;
+ case CmpInst::ICMP_SGE:
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
+ addFact(CmpInst::ICMP_UGE, A, B, IsNegated, NumIn, NumOut, DFSInStack);
+ }
+ break;
+ }
}
namespace {
@@ -271,134 +449,253 @@ struct ConstraintOrBlock {
Not(Not), Condition(Condition) {}
};
-struct StackEntry {
- unsigned NumIn;
- unsigned NumOut;
- CmpInst *Condition;
- bool IsNot;
+/// Keep state required to build worklist.
+struct State {
+ DominatorTree &DT;
+ SmallVector<ConstraintOrBlock, 64> WorkList;
- StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot)
- : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {}
+ State(DominatorTree &DT) : DT(DT) {}
+
+ /// Process block \p BB and add known facts to work-list.
+ void addInfoFor(BasicBlock &BB);
+
+ /// Returns true if we can add a known condition from BB to its successor
+ /// block Succ. Each predecessor of Succ can either be BB or be dominated
+ /// by Succ (e.g. the case when adding a condition from a pre-header to a
+ /// loop header).
+ bool canAddSuccessor(BasicBlock &BB, BasicBlock *Succ) const {
+ if (BB.getSingleSuccessor()) {
+ assert(BB.getSingleSuccessor() == Succ);
+ return DT.properlyDominates(&BB, Succ);
+ }
+ return any_of(successors(&BB),
+ [Succ](const BasicBlock *S) { return S != Succ; }) &&
+ all_of(predecessors(Succ), [&BB, Succ, this](BasicBlock *Pred) {
+ return Pred == &BB || DT.dominates(Succ, Pred);
+ });
+ }
};
+
} // namespace
#ifndef NDEBUG
-static void dumpWithNames(ConstraintTy &C,
+static void dumpWithNames(const ConstraintSystem &CS,
DenseMap<Value *, unsigned> &Value2Index) {
SmallVector<std::string> Names(Value2Index.size(), "");
for (auto &KV : Value2Index) {
Names[KV.second - 1] = std::string("%") + KV.first->getName().str();
}
- ConstraintSystem CS;
- CS.addVariableRowFill(C.Coefficients);
CS.dump(Names);
}
-#endif
-static bool eliminateConstraints(Function &F, DominatorTree &DT) {
- bool Changed = false;
- DT.updateDFSNumbers();
+static void dumpWithNames(ArrayRef<int64_t> C,
+ DenseMap<Value *, unsigned> &Value2Index) {
ConstraintSystem CS;
+ CS.addVariableRowFill(C);
+ dumpWithNames(CS, Value2Index);
+}
+#endif
- SmallVector<ConstraintOrBlock, 64> WorkList;
+void State::addInfoFor(BasicBlock &BB) {
+ WorkList.emplace_back(DT.getNode(&BB));
- // First, collect conditions implied by branches and blocks with their
- // Dominator DFS in and out numbers.
- for (BasicBlock &BB : F) {
- if (!DT.getNode(&BB))
- continue;
- WorkList.emplace_back(DT.getNode(&BB));
-
- // True as long as long as the current instruction is guaranteed to execute.
- bool GuaranteedToExecute = true;
- // Scan BB for assume calls.
- // TODO: also use this scan to queue conditions to simplify, so we can
- // interleave facts from assumes and conditions to simplify in a single
- // basic block. And to skip another traversal of each basic block when
- // simplifying.
- for (Instruction &I : BB) {
- Value *Cond;
- // For now, just handle assumes with a single compare as condition.
- if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
- isa<CmpInst>(Cond)) {
- if (GuaranteedToExecute) {
- // The assume is guaranteed to execute when BB is entered, hence Cond
- // holds on entry to BB.
- WorkList.emplace_back(DT.getNode(&BB), cast<CmpInst>(Cond), false);
- } else {
- // Otherwise the condition only holds in the successors.
- for (BasicBlock *Succ : successors(&BB))
- WorkList.emplace_back(DT.getNode(Succ), cast<CmpInst>(Cond), false);
+ // True as long as long as the current instruction is guaranteed to execute.
+ bool GuaranteedToExecute = true;
+ // Scan BB for assume calls.
+ // TODO: also use this scan to queue conditions to simplify, so we can
+ // interleave facts from assumes and conditions to simplify in a single
+ // basic block. And to skip another traversal of each basic block when
+ // simplifying.
+ for (Instruction &I : BB) {
+ Value *Cond;
+ // For now, just handle assumes with a single compare as condition.
+ if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
+ isa<ICmpInst>(Cond)) {
+ if (GuaranteedToExecute) {
+ // The assume is guaranteed to execute when BB is entered, hence Cond
+ // holds on entry to BB.
+ WorkList.emplace_back(DT.getNode(&BB), cast<ICmpInst>(Cond), false);
+ } else {
+ // Otherwise the condition only holds in the successors.
+ for (BasicBlock *Succ : successors(&BB)) {
+ if (!canAddSuccessor(BB, Succ))
+ continue;
+ WorkList.emplace_back(DT.getNode(Succ), cast<ICmpInst>(Cond), false);
}
}
- GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
}
+ GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
+ }
- auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Br || !Br->isConditional())
- continue;
+ auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
+ if (!Br || !Br->isConditional())
+ return;
- // Returns true if we can add a known condition from BB to its successor
- // block Succ. Each predecessor of Succ can either be BB or be dominated by
- // Succ (e.g. the case when adding a condition from a pre-header to a loop
- // header).
- auto CanAdd = [&BB, &DT](BasicBlock *Succ) {
- return all_of(predecessors(Succ), [&BB, &DT, Succ](BasicBlock *Pred) {
- return Pred == &BB || DT.dominates(Succ, Pred);
- });
- };
- // If the condition is an OR of 2 compares and the false successor only has
- // the current block as predecessor, queue both negated conditions for the
- // false successor.
- Value *Op0, *Op1;
- if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
- match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
- BasicBlock *FalseSuccessor = Br->getSuccessor(1);
- if (CanAdd(FalseSuccessor)) {
- WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0),
- true);
- WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1),
- true);
- }
- continue;
+ // If the condition is an OR of 2 compares and the false successor only has
+ // the current block as predecessor, queue both negated conditions for the
+ // false successor.
+ Value *Op0, *Op1;
+ if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
+ isa<ICmpInst>(Op0) && isa<ICmpInst>(Op1)) {
+ BasicBlock *FalseSuccessor = Br->getSuccessor(1);
+ if (canAddSuccessor(BB, FalseSuccessor)) {
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<ICmpInst>(Op0),
+ true);
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<ICmpInst>(Op1),
+ true);
+ }
+ return;
+ }
+
+ // If the condition is an AND of 2 compares and the true successor only has
+ // the current block as predecessor, queue both conditions for the true
+ // successor.
+ if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
+ isa<ICmpInst>(Op0) && isa<ICmpInst>(Op1)) {
+ BasicBlock *TrueSuccessor = Br->getSuccessor(0);
+ if (canAddSuccessor(BB, TrueSuccessor)) {
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<ICmpInst>(Op0),
+ false);
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<ICmpInst>(Op1),
+ false);
+ }
+ return;
+ }
+
+ auto *CmpI = dyn_cast<ICmpInst>(Br->getCondition());
+ if (!CmpI)
+ return;
+ if (canAddSuccessor(BB, Br->getSuccessor(0)))
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
+ if (canAddSuccessor(BB, Br->getSuccessor(1)))
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
+}
+
+void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
+ bool IsNegated, unsigned NumIn, unsigned NumOut,
+ SmallVectorImpl<StackEntry> &DFSInStack) {
+ // If the constraint has a pre-condition, skip the constraint if it does not
+ // hold.
+ DenseMap<Value *, unsigned> NewIndices;
+ auto R = getConstraint(Pred, A, B, NewIndices);
+ if (!R.isValid(*this))
+ return;
+
+ //LLVM_DEBUG(dbgs() << "Adding " << *Condition << " " << IsNegated << "\n");
+ bool Added = false;
+ assert(CmpInst::isSigned(Pred) == R.IsSigned &&
+ "condition and constraint signs must match");
+ auto &CSToUse = getCS(R.IsSigned);
+ if (R.Coefficients.empty())
+ return;
+
+ Added |= CSToUse.addVariableRowFill(R.Coefficients);
+
+ // If R has been added to the system, queue it for removal once it goes
+ // out-of-scope.
+ if (Added) {
+ SmallVector<Value *, 2> ValuesToRelease;
+ for (auto &KV : NewIndices) {
+ getValue2Index(R.IsSigned).insert(KV);
+ ValuesToRelease.push_back(KV.first);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " constraint: ";
+ dumpWithNames(R.Coefficients, getValue2Index(R.IsSigned));
+ });
+
+ DFSInStack.emplace_back(NumIn, NumOut, IsNegated, R.IsSigned,
+ ValuesToRelease);
+
+ if (R.IsEq) {
+ // Also add the inverted constraint for equality constraints.
+ for (auto &Coeff : R.Coefficients)
+ Coeff *= -1;
+ CSToUse.addVariableRowFill(R.Coefficients);
+
+ DFSInStack.emplace_back(NumIn, NumOut, IsNegated, R.IsSigned,
+ SmallVector<Value *, 2>());
}
+ }
+}
+
+static void
+tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
+ SmallVectorImpl<Instruction *> &ToRemove) {
+ auto DoesConditionHold = [](CmpInst::Predicate Pred, Value *A, Value *B,
+ ConstraintInfo &Info) {
+ DenseMap<Value *, unsigned> NewIndices;
+ auto R = Info.getConstraint(Pred, A, B, NewIndices);
+ if (R.size() < 2 || R.needsNewIndices(NewIndices) || !R.isValid(Info))
+ return false;
+
+ auto &CSToUse = Info.getCS(CmpInst::isSigned(Pred));
+ return CSToUse.isConditionImplied(R.Coefficients);
+ };
+
+ if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
+ // If A s>= B && B s>= 0, ssub.with.overflow(a, b) should not overflow and
+ // can be simplified to a regular sub.
+ Value *A = II->getArgOperand(0);
+ Value *B = II->getArgOperand(1);
+ if (!DoesConditionHold(CmpInst::ICMP_SGE, A, B, Info) ||
+ !DoesConditionHold(CmpInst::ICMP_SGE, B,
+ ConstantInt::get(A->getType(), 0), Info))
+ return;
+
+ IRBuilder<> Builder(II->getParent(), II->getIterator());
+ Value *Sub = nullptr;
+ for (User *U : make_early_inc_range(II->users())) {
+ if (match(U, m_ExtractValue<0>(m_Value()))) {
+ if (!Sub)
+ Sub = Builder.CreateSub(A, B);
+ U->replaceAllUsesWith(Sub);
+ } else if (match(U, m_ExtractValue<1>(m_Value())))
+ U->replaceAllUsesWith(Builder.getFalse());
+ else
+ continue;
- // If the condition is an AND of 2 compares and the true successor only has
- // the current block as predecessor, queue both conditions for the true
- // successor.
- if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
- match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
- BasicBlock *TrueSuccessor = Br->getSuccessor(0);
- if (CanAdd(TrueSuccessor)) {
- WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0),
- false);
- WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1),
- false);
+ if (U->use_empty()) {
+ auto *I = cast<Instruction>(U);
+ ToRemove.push_back(I);
+ I->setOperand(0, PoisonValue::get(II->getType()));
}
- continue;
}
- auto *CmpI = dyn_cast<CmpInst>(Br->getCondition());
- if (!CmpI)
+ if (II->use_empty())
+ II->eraseFromParent();
+ }
+}
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT) {
+ bool Changed = false;
+ DT.updateDFSNumbers();
+
+ ConstraintInfo Info;
+ State S(DT);
+
+ // First, collect conditions implied by branches and blocks with their
+ // Dominator DFS in and out numbers.
+ for (BasicBlock &BB : F) {
+ if (!DT.getNode(&BB))
continue;
- if (CanAdd(Br->getSuccessor(0)))
- WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
- if (CanAdd(Br->getSuccessor(1)))
- WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
+ S.addInfoFor(BB);
}
// Next, sort worklist by dominance, so that dominating blocks and conditions
// come before blocks and conditions dominated by them. If a block and a
// condition have the same numbers, the condition comes before the block, as
// it holds on entry to the block.
- sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
+ stable_sort(S.WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock);
});
+ SmallVector<Instruction *> ToRemove;
+
// Finally, process ordered worklist and eliminate implied conditions.
SmallVector<StackEntry, 16> DFSInStack;
- DenseMap<Value *, unsigned> Value2Index;
- for (ConstraintOrBlock &CB : WorkList) {
+ for (ConstraintOrBlock &CB : S.WorkList) {
// First, pop entries from the stack that are out-of-scope for CB. Remove
// the corresponding entry from the constraint system.
while (!DFSInStack.empty()) {
@@ -409,10 +706,20 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
assert(E.NumIn <= CB.NumIn);
if (CB.NumOut <= E.NumOut)
break;
- LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot
- << "\n");
+ LLVM_DEBUG({
+ dbgs() << "Removing ";
+ dumpWithNames(Info.getCS(E.IsSigned).getLastConstraint(),
+ Info.getValue2Index(E.IsSigned));
+ dbgs() << "\n";
+ });
+
+ Info.popLastConstraint(E.IsSigned);
+ // Remove variables in the system that went out of scope.
+ auto &Mapping = Info.getValue2Index(E.IsSigned);
+ for (Value *V : E.ValuesToRelease)
+ Mapping.erase(V);
+ Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size());
DFSInStack.pop_back();
- CS.popLastConstraint();
}
LLVM_DEBUG({
@@ -427,28 +734,30 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
// For a block, check if any CmpInsts become known based on the current set
// of constraints.
if (CB.IsBlock) {
- for (Instruction &I : *CB.BB) {
- auto *Cmp = dyn_cast<CmpInst>(&I);
+ for (Instruction &I : make_early_inc_range(*CB.BB)) {
+ if (auto *II = dyn_cast<WithOverflowInst>(&I)) {
+ tryToSimplifyOverflowMath(II, Info, ToRemove);
+ continue;
+ }
+ auto *Cmp = dyn_cast<ICmpInst>(&I);
if (!Cmp)
continue;
DenseMap<Value *, unsigned> NewIndices;
- auto R = getConstraint(Cmp, Value2Index, NewIndices);
- if (R.size() != 1)
- continue;
-
- if (R.needsNewIndices(NewIndices))
+ auto R = Info.getConstraint(Cmp, NewIndices);
+ if (R.IsEq || R.empty() || R.needsNewIndices(NewIndices) ||
+ !R.isValid(Info))
continue;
- if (CS.isConditionImplied(R.get(0).Coefficients)) {
+ auto &CSToUse = Info.getCS(R.IsSigned);
+ if (CSToUse.isConditionImplied(R.Coefficients)) {
if (!DebugCounter::shouldExecute(EliminatedCounter))
continue;
- LLVM_DEBUG(dbgs() << "Condition " << *Cmp
- << " implied by dominating constraints\n");
LLVM_DEBUG({
- for (auto &E : reverse(DFSInStack))
- dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ dbgs() << "Condition " << *Cmp
+ << " implied by dominating constraints\n";
+ dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned));
});
Cmp->replaceUsesWithIf(
ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) {
@@ -460,16 +769,15 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
NumCondsRemoved++;
Changed = true;
}
- if (CS.isConditionImplied(
- ConstraintSystem::negate(R.get(0).Coefficients))) {
+ if (CSToUse.isConditionImplied(
+ ConstraintSystem::negate(R.Coefficients))) {
if (!DebugCounter::shouldExecute(EliminatedCounter))
continue;
- LLVM_DEBUG(dbgs() << "Condition !" << *Cmp
- << " implied by dominating constraints\n");
LLVM_DEBUG({
- for (auto &E : reverse(DFSInStack))
- dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ dbgs() << "Condition !" << *Cmp
+ << " implied by dominating constraints\n";
+ dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned));
});
Cmp->replaceAllUsesWith(
ConstantInt::getFalse(F.getParent()->getContext()));
@@ -482,7 +790,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
// Set up a function to restore the predicate at the end of the scope if it
// has been negated. Negate the predicate in-place, if required.
- auto *CI = dyn_cast<CmpInst>(CB.Condition);
+ auto *CI = dyn_cast<ICmpInst>(CB.Condition);
auto PredicateRestorer = make_scope_exit([CI, &CB]() {
if (CB.Not && CI)
CI->setPredicate(CI->getInversePredicate());
@@ -496,34 +804,28 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
}
}
- // Otherwise, add the condition to the system and stack, if we can transform
- // it into a constraint.
- DenseMap<Value *, unsigned> NewIndices;
- auto R = getConstraint(CB.Condition, Value2Index, NewIndices);
- if (R.empty())
- continue;
-
- for (auto &KV : NewIndices)
- Value2Index.insert(KV);
-
- LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n");
- bool Added = false;
- for (auto &C : R.Constraints) {
- auto Coeffs = C.Coefficients;
- LLVM_DEBUG({
- dbgs() << " constraint: ";
- dumpWithNames(C, Value2Index);
- });
- Added |= CS.addVariableRowFill(Coeffs);
- // If R has been added to the system, queue it for removal once it goes
- // out-of-scope.
- if (Added)
- DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not);
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (match(CB.Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ // Otherwise, add the condition to the system and stack, if we can
+ // transform it into a constraint.
+ Info.addFact(Pred, A, B, CB.Not, CB.NumIn, CB.NumOut, DFSInStack);
+ Info.transferToOtherSystem(Pred, A, B, CB.Not, CB.NumIn, CB.NumOut,
+ DFSInStack);
}
}
- assert(CS.size() == DFSInStack.size() &&
+#ifndef NDEBUG
+ unsigned SignedEntries =
+ count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; });
+ assert(Info.getCS(false).size() == DFSInStack.size() - SignedEntries &&
+ "updates to CS and DFSInStack are out of sync");
+ assert(Info.getCS(true).size() == SignedEntries &&
"updates to CS and DFSInStack are out of sync");
+#endif
+
+ for (Instruction *I : ToRemove)
+ I->eraseFromParent();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index a3fd97079b1d..64bd4241f37c 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -41,8 +41,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
@@ -215,6 +213,53 @@ static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
return true;
}
+static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming,
+ BasicBlock *From, BasicBlock *To,
+ Instruction *CxtI) {
+ if (Constant *C = LVI->getConstantOnEdge(Incoming, From, To, CxtI))
+ return C;
+
+ // Look if the incoming value is a select with a scalar condition for which
+ // LVI can tells us the value. In that case replace the incoming value with
+ // the appropriate value of the select. This often allows us to remove the
+ // select later.
+ auto *SI = dyn_cast<SelectInst>(Incoming);
+ if (!SI)
+ return nullptr;
+
+ // Once LVI learns to handle vector types, we could also add support
+ // for vector type constants that are not all zeroes or all ones.
+ Value *Condition = SI->getCondition();
+ if (!Condition->getType()->isVectorTy()) {
+ if (Constant *C = LVI->getConstantOnEdge(Condition, From, To, CxtI)) {
+ if (C->isOneValue())
+ return SI->getTrueValue();
+ if (C->isZeroValue())
+ return SI->getFalseValue();
+ }
+ }
+
+ // Look if the select has a constant but LVI tells us that the incoming
+ // value can never be that constant. In that case replace the incoming
+ // value with the other value of the select. This often allows us to
+ // remove the select later.
+
+ // The "false" case
+ if (auto *C = dyn_cast<Constant>(SI->getFalseValue()))
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) ==
+ LazyValueInfo::False)
+ return SI->getTrueValue();
+
+ // The "true" case,
+ // similar to the select "false" case, but try the select "true" value
+ if (auto *C = dyn_cast<Constant>(SI->getTrueValue()))
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) ==
+ LazyValueInfo::False)
+ return SI->getFalseValue();
+
+ return nullptr;
+}
+
static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
const SimplifyQuery &SQ) {
bool Changed = false;
@@ -224,53 +269,14 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
- Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
-
- // Look if the incoming value is a select with a scalar condition for which
- // LVI can tells us the value. In that case replace the incoming value with
- // the appropriate value of the select. This often allows us to remove the
- // select later.
- if (!V) {
- SelectInst *SI = dyn_cast<SelectInst>(Incoming);
- if (!SI) continue;
-
- Value *Condition = SI->getCondition();
- if (!Condition->getType()->isVectorTy()) {
- if (Constant *C = LVI->getConstantOnEdge(
- Condition, P->getIncomingBlock(i), BB, P)) {
- if (C->isOneValue()) {
- V = SI->getTrueValue();
- } else if (C->isZeroValue()) {
- V = SI->getFalseValue();
- }
- // Once LVI learns to handle vector types, we could also add support
- // for vector type constants that are not all zeroes or all ones.
- }
- }
-
- // Look if the select has a constant but LVI tells us that the incoming
- // value can never be that constant. In that case replace the incoming
- // value with the other value of the select. This often allows us to
- // remove the select later.
- if (!V) {
- Constant *C = dyn_cast<Constant>(SI->getFalseValue());
- if (!C) continue;
-
- if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
- P->getIncomingBlock(i), BB, P) !=
- LazyValueInfo::False)
- continue;
- V = SI->getTrueValue();
- }
-
- LLVM_DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
+ Value *V = getValueOnEdge(LVI, Incoming, P->getIncomingBlock(i), BB, P);
+ if (V) {
+ P->setIncomingValue(i, V);
+ Changed = true;
}
-
- P->setIncomingValue(i, V);
- Changed = true;
}
- if (Value *V = SimplifyInstruction(P, SQ)) {
+ if (Value *V = simplifyInstruction(P, SQ)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
@@ -575,7 +581,7 @@ static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) {
StructType *ST = cast<StructType>(WO->getType());
Constant *Struct = ConstantStruct::get(ST,
- { UndefValue::get(ST->getElementType(0)),
+ { PoisonValue::get(ST->getElementType(0)),
ConstantInt::getFalse(ST->getElementType(1)) });
Value *NewI = B.CreateInsertValue(Struct, NewOp, 0);
WO->replaceAllUsesWith(NewI);
@@ -735,8 +741,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
// prove that such a combination is impossible, we need to bump the bitwidth.
if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) &&
- CRs[0]->contains(
- APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
+ CRs[0]->contains(APInt::getSignedMinValue(MinSignedBits).sext(OrigWidth)))
++MinSignedBits;
// Don't shrink below 8 bits wide.
@@ -955,7 +960,8 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
++NumAShrsConverted;
auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
- SDI->getName(), SDI);
+ "", SDI);
+ BO->takeName(SDI);
BO->setDebugLoc(SDI->getDebugLoc());
BO->setIsExact(SDI->isExact());
SDI->replaceAllUsesWith(BO);
@@ -974,8 +980,8 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
return false;
++NumSExt;
- auto *ZExt =
- CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI);
+ auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI);
+ ZExt->takeName(SDI);
ZExt->setDebugLoc(SDI->getDebugLoc());
SDI->replaceAllUsesWith(ZExt);
SDI->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 143a78f604fc..5667eefabad5 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -60,30 +60,31 @@
#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <deque>
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Verifier.h"
+#endif
+
using namespace llvm;
#define DEBUG_TYPE "dfa-jump-threading"
@@ -102,6 +103,11 @@ static cl::opt<unsigned> MaxPathLength(
cl::desc("Max number of blocks searched to find a threading path"),
cl::Hidden, cl::init(20));
+static cl::opt<unsigned> MaxNumPaths(
+ "dfa-max-num-paths",
+ cl::desc("Max number of paths enumerated around a switch"),
+ cl::Hidden, cl::init(200));
+
static cl::opt<unsigned>
CostThreshold("dfa-cost-threshold",
cl::desc("Maximum cost accepted for the transformation"),
@@ -414,7 +420,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
struct MainSwitch {
MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
- if (isPredictable(SI)) {
+ if (isCandidate(SI)) {
Instr = SI;
} else {
ORE->emit([&]() {
@@ -432,83 +438,60 @@ struct MainSwitch {
}
private:
- /// Do a use-def chain traversal. Make sure the value of the switch variable
- /// is always a known constant. This means that all conditional jumps based on
- /// switch variable can be converted to unconditional jumps.
- bool isPredictable(const SwitchInst *SI) {
- std::deque<Instruction *> Q;
+ /// Do a use-def chain traversal starting from the switch condition to see if
+ /// \p SI is a potential condidate.
+ ///
+ /// Also, collect select instructions to unfold.
+ bool isCandidate(const SwitchInst *SI) {
+ std::deque<Value *> Q;
SmallSet<Value *, 16> SeenValues;
SelectInsts.clear();
- Value *FirstDef = SI->getOperand(0);
- auto *Inst = dyn_cast<Instruction>(FirstDef);
-
- // If this is a function argument or another non-instruction, then give up.
- // We are interested in loop local variables.
- if (!Inst)
- return false;
-
- // Require the first definition to be a PHINode
- if (!isa<PHINode>(Inst))
+ Value *SICond = SI->getCondition();
+ LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
+ if (!isa<PHINode>(SICond))
return false;
- LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n");
-
- Q.push_back(Inst);
- SeenValues.insert(FirstDef);
+ addToQueue(SICond, Q, SeenValues);
while (!Q.empty()) {
- Instruction *Current = Q.front();
+ Value *Current = Q.front();
Q.pop_front();
if (auto *Phi = dyn_cast<PHINode>(Current)) {
for (Value *Incoming : Phi->incoming_values()) {
- if (!isPredictableValue(Incoming, SeenValues))
- return false;
- addInstToQueue(Incoming, Q, SeenValues);
+ addToQueue(Incoming, Q, SeenValues);
}
- LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
} else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
if (!isValidSelectInst(SelI))
return false;
- if (!isPredictableValue(SelI->getTrueValue(), SeenValues) ||
- !isPredictableValue(SelI->getFalseValue(), SeenValues)) {
- return false;
- }
- addInstToQueue(SelI->getTrueValue(), Q, SeenValues);
- addInstToQueue(SelI->getFalseValue(), Q, SeenValues);
- LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n");
+ addToQueue(SelI->getTrueValue(), Q, SeenValues);
+ addToQueue(SelI->getFalseValue(), Q, SeenValues);
+ LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
+ } else if (isa<Constant>(Current)) {
+ LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
+ continue;
} else {
- // If it is neither a phi nor a select, then we give up.
- return false;
+ LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
+ // Allow unpredictable values. The hope is that those will be the
+ // initial switch values that can be ignored (they will hit the
+ // unthreaded switch) but this assumption will get checked later after
+ // paths have been enumerated (in function getStateDefMap).
+ continue;
}
}
return true;
}
- bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.contains(InpVal))
- return true;
-
- if (isa<ConstantInt>(InpVal))
- return true;
-
- // If this is a function argument or another non-instruction, then give up.
- if (!isa<Instruction>(InpVal))
- return false;
-
- return true;
- }
-
- void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
- SmallSet<Value *, 16> &SeenValues) {
+ void addToQueue(Value *Val, std::deque<Value *> &Q,
+ SmallSet<Value *, 16> &SeenValues) {
if (SeenValues.contains(Val))
return;
- if (Instruction *I = dyn_cast<Instruction>(Val))
- Q.push_back(I);
+ Q.push_back(Val);
SeenValues.insert(Val);
}
@@ -562,7 +545,16 @@ struct AllSwitchPaths {
void run() {
VisitedBlocks Visited;
PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
- StateDefMap StateDef = getStateDefMap();
+ StateDefMap StateDef = getStateDefMap(LoopPaths);
+
+ if (StateDef.empty()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
+ Switch)
+ << "Switch instruction is not predictable.";
+ });
+ return;
+ }
for (PathType Path : LoopPaths) {
ThreadingPath TPath;
@@ -637,6 +629,9 @@ private:
PathType NewPath(Path);
NewPath.push_front(BB);
Res.push_back(NewPath);
+ if (Res.size() >= MaxNumPaths) {
+ return Res;
+ }
}
}
// This block could now be visited again from a different predecessor. Note
@@ -647,14 +642,22 @@ private:
}
/// Walk the use-def chain and collect all the state-defining instructions.
- StateDefMap getStateDefMap() const {
+ ///
+ /// Return an empty map if unpredictable values encountered inside the basic
+ /// blocks of \p LoopPaths.
+ StateDefMap getStateDefMap(const PathsType &LoopPaths) const {
StateDefMap Res;
+ // Basic blocks belonging to any of the loops around the switch statement.
+ SmallPtrSet<BasicBlock *, 16> LoopBBs;
+ for (const PathType &Path : LoopPaths) {
+ for (BasicBlock *BB : Path)
+ LoopBBs.insert(BB);
+ }
+
Value *FirstDef = Switch->getOperand(0);
- assert(isa<PHINode>(FirstDef) && "After select unfolding, all state "
- "definitions are expected to be phi "
- "nodes.");
+ assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
SmallVector<PHINode *, 8> Stack;
Stack.push_back(dyn_cast<PHINode>(FirstDef));
@@ -666,15 +669,17 @@ private:
Res[CurPhi->getParent()] = CurPhi;
SeenValues.insert(CurPhi);
- for (Value *Incoming : CurPhi->incoming_values()) {
+ for (BasicBlock *IncomingBB : CurPhi->blocks()) {
+ Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB);
+ bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0;
if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
- SeenValues.contains(Incoming)) {
+ SeenValues.contains(Incoming) || IsOutsideLoops) {
continue;
}
- assert(isa<PHINode>(Incoming) && "After select unfolding, all state "
- "definitions are expected to be phi "
- "nodes.");
+ // Any unpredictable value inside the loops means we must bail out.
+ if (!isa<PHINode>(Incoming))
+ return StateDefMap();
Stack.push_back(cast<PHINode>(Incoming));
}
@@ -823,6 +828,16 @@ private:
});
return false;
}
+
+ if (!Metrics.NumInsts.isValid()) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
+ << "instructions with invalid cost.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
+ << "Contains instructions with invalid cost.";
+ });
+ return false;
+ }
}
unsigned DuplicationCost = 0;
@@ -836,7 +851,7 @@ private:
// using binary search, hence the LogBase2().
unsigned CondBranches =
APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
- DuplicationCost = Metrics.NumInsts / CondBranches;
+ DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
} else {
// Compared with jump tables, the DFA optimizer removes an indirect branch
// on each loop iteration, thus making branch prediction more precise. The
@@ -844,7 +859,7 @@ private:
// predictor to make a mistake, and the more benefit there is in the DFA
// optimizer. Thus, the more branch targets there are, the lower is the
// cost of the DFA opt.
- DuplicationCost = Metrics.NumInsts / JumpTableSize;
+ DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
}
LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
@@ -1197,7 +1212,7 @@ private:
PhiToRemove.push_back(Phi);
}
for (PHINode *PN : PhiToRemove) {
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
PN->eraseFromParent();
}
return;
@@ -1246,7 +1261,7 @@ private:
/// Returns true if IncomingBB is a predecessor of BB.
bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
- return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB);
+ return llvm::is_contained(predecessors(BB), IncomingBB);
}
AllSwitchPaths *SwitchPaths;
@@ -1278,7 +1293,7 @@ bool DFAJumpThreading::run(Function &F) {
continue;
LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
- << " is predictable\n");
+ << " is a candidate\n");
MainSwitch Switch(SI, ORE);
if (!Switch.getInstr())
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index ae636e7b61f7..4c42869dbd58 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,7 +38,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -62,8 +64,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -75,7 +75,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
@@ -83,7 +82,6 @@
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <iterator>
#include <map>
@@ -766,20 +764,27 @@ struct DSEState {
// Post-order numbers for each basic block. Used to figure out if memory
// accesses are executed before another access.
DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
+ // Values that are only used with assumes. Used to refine pointer escape
+ // analysis.
+ SmallPtrSet<const Value *, 32> EphValues;
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
/// basic block.
MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Check if there are root nodes that are terminated by UnreachableInst.
+ // Those roots pessimize post-dominance queries. If there are such roots,
+ // fall back to CFG scan starting from all non-unreachable roots.
+ bool AnyUnreachableExit;
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
- PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
- const LoopInfo &LI)
- : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
- PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+ PostDominatorTree &PDT, AssumptionCache &AC,
+ const TargetLibraryInfo &TLI, const LoopInfo &LI)
+ : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
+ DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@@ -805,6 +810,12 @@ struct DSEState {
// Collect whether there is any irreducible control flow in the function.
ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
+
+ AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
+ return isa<UnreachableInst>(E->getTerminator());
+ });
+
+ CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
}
/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
@@ -951,7 +962,7 @@ struct DSEState {
if (!isInvisibleToCallerOnUnwind(V)) {
I.first->second = false;
} else if (isNoAliasCall(V)) {
- I.first->second = !PointerMayBeCaptured(V, true, false);
+ I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
}
}
return I.first->second;
@@ -970,7 +981,7 @@ struct DSEState {
// with the killing MemoryDef. But we refrain from doing so for now to
// limit compile-time and this does not cause any changes to the number
// of stores removed on a large test set in practice.
- I.first->second = PointerMayBeCaptured(V, false, true);
+ I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
return !I.first->second;
}
@@ -1003,7 +1014,8 @@ struct DSEState {
if (CB->isLifetimeStartOrEnd())
return false;
- return CB->use_empty() && CB->willReturn() && CB->doesNotThrow();
+ return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
+ !CB->isTerminator();
}
return false;
@@ -1233,6 +1245,9 @@ struct DSEState {
// Reached TOP.
if (MSSA.isLiveOnEntryDef(Current)) {
LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
+ if (CanOptimize && Current != KillingDef->getDefiningAccess())
+ // The first clobbering def is... none.
+ KillingDef->setOptimized(Current);
return None;
}
@@ -1309,7 +1324,6 @@ struct DSEState {
// memory location and not located in different loops.
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
- WalkerStepLimit -= 1;
CanOptimize = false;
continue;
}
@@ -1508,54 +1522,56 @@ struct DSEState {
CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
- // If CommonPred is in the set of killing blocks, just check if it
- // post-dominates MaybeDeadAccess.
- if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
- return {MaybeDeadAccess};
- return None;
- }
-
// If the common post-dominator does not post-dominate MaybeDeadAccess,
// there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
- SetVector<BasicBlock *> WorkList;
+ if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
+ if (!AnyUnreachableExit)
+ return None;
- // If CommonPred is null, there are multiple exits from the function.
- // They all have to be added to the worklist.
- if (CommonPred)
- WorkList.insert(CommonPred);
- else
- for (BasicBlock *R : PDT.roots())
+ // Fall back to CFG scan starting at all non-unreachable roots if not
+ // all paths to the exit go through CommonPred.
+ CommonPred = nullptr;
+ }
+
+ // If CommonPred itself is in the set of killing blocks, we're done.
+ if (KillingBlocks.count(CommonPred))
+ return {MaybeDeadAccess};
+
+ SetVector<BasicBlock *> WorkList;
+ // If CommonPred is null, there are multiple exits from the function.
+ // They all have to be added to the worklist.
+ if (CommonPred)
+ WorkList.insert(CommonPred);
+ else
+ for (BasicBlock *R : PDT.roots()) {
+ if (!isa<UnreachableInst>(R->getTerminator()))
WorkList.insert(R);
+ }
- NumCFGTries++;
- // Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching MaybeDeadAccess.
- for (unsigned I = 0; I < WorkList.size(); I++) {
- NumCFGChecks++;
- BasicBlock *Current = WorkList[I];
- if (KillingBlocks.count(Current))
- continue;
- if (Current == MaybeDeadAccess->getBlock())
- return None;
+ NumCFGTries++;
+ // Check if all paths starting from an exit node go through one of the
+ // killing blocks before reaching MaybeDeadAccess.
+ for (unsigned I = 0; I < WorkList.size(); I++) {
+ NumCFGChecks++;
+ BasicBlock *Current = WorkList[I];
+ if (KillingBlocks.count(Current))
+ continue;
+ if (Current == MaybeDeadAccess->getBlock())
+ return None;
- // MaybeDeadAccess is reachable from the entry, so we don't have to
- // explore unreachable blocks further.
- if (!DT.isReachableFromEntry(Current))
- continue;
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
+ // explore unreachable blocks further.
+ if (!DT.isReachableFromEntry(Current))
+ continue;
- for (BasicBlock *Pred : predecessors(Current))
- WorkList.insert(Pred);
+ for (BasicBlock *Pred : predecessors(Current))
+ WorkList.insert(Pred);
- if (WorkList.size() >= MemorySSAPathCheckLimit)
- return None;
- }
- NumCFGSuccess++;
- return {MaybeDeadAccess};
+ if (WorkList.size() >= MemorySSAPathCheckLimit)
+ return None;
}
- return None;
+ NumCFGSuccess++;
}
// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
@@ -1780,10 +1796,9 @@ struct DSEState {
if (!isRemovable(DefI))
return false;
- if (StoredConstant && isAllocationFn(DefUO, &TLI)) {
- auto *CB = cast<CallBase>(DefUO);
- auto *InitC = getInitialValueOfAllocation(CB, &TLI,
- StoredConstant->getType());
+ if (StoredConstant) {
+ Constant *InitC =
+ getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
// If the clobbering access is LiveOnEntry, no instructions between them
// can modify the memory location.
if (InitC && InitC == StoredConstant)
@@ -1921,11 +1936,13 @@ struct DSEState {
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
+ AssumptionCache &AC,
const TargetLibraryInfo &TLI,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ MSSA.ensureOptimizedUses();
+ DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
@@ -2105,9 +2122,10 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@@ -2147,9 +2165,11 @@ public:
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@@ -2173,6 +2193,7 @@ public:
AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
}
};
@@ -2190,6 +2211,7 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 59b934c16c8a..cf2824954122 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -30,19 +29,16 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -55,7 +51,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
-#include "llvm/Transforms/Utils/GuardUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <deque>
@@ -781,6 +776,21 @@ private:
return getLoadStorePointerOperand(Inst);
}
+ Type *getValueType() const {
+ // TODO: handle target-specific intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return II->getType();
+ case Intrinsic::masked_store:
+ return II->getArgOperand(0)->getType();
+ default:
+ return nullptr;
+ }
+ }
+ return getLoadStoreType(Inst);
+ }
+
bool mayReadFromMemory() const {
if (IntrID != 0)
return Info.ReadMem;
@@ -1162,6 +1172,9 @@ bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
"Violated invariant");
if (Earlier.getPointerOperand() != Later.getPointerOperand())
return false;
+ if (!Earlier.getValueType() || !Later.getValueType() ||
+ Earlier.getValueType() != Later.getValueType())
+ return false;
if (Earlier.getMatchingId() != Later.getMatchingId())
return false;
// At the moment, we don't remove ordered stores, but do remove
@@ -1334,7 +1347,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(&Inst, SQ)) {
+ if (Value *V = simplifyInstruction(&Inst, SQ)) {
LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
<< '\n');
if (!DebugCounter::shouldExecute(CSECounter)) {
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 44017b555769..ad2041cd4253 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,8 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index a98bb8358aef..56f2a3b3004d 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -11,24 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <deque>
-#include <functional> // For std::function
#define DEBUG_TYPE "float2int"
@@ -236,116 +234,111 @@ void Float2IntPass::walkBackwards() {
}
}
-// Walk forwards down the list of seen instructions, so we visit defs before
-// uses.
-void Float2IntPass::walkForwards() {
- for (auto &It : reverse(SeenInsts)) {
- if (It.second != unknownRange())
- continue;
+// Calculate result range from operand ranges.
+// Return None if the range cannot be calculated yet.
+Optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) {
+ SmallVector<ConstantRange, 4> OpRanges;
+ for (Value *O : I->operands()) {
+ if (Instruction *OI = dyn_cast<Instruction>(O)) {
+ auto OpIt = SeenInsts.find(OI);
+ assert(OpIt != SeenInsts.end() && "def not seen before use!");
+ if (OpIt->second == unknownRange())
+ return None; // Wait until operand range has been calculated.
+ OpRanges.push_back(OpIt->second);
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+ // Work out if the floating point number can be losslessly represented
+ // as an integer.
+ // APFloat::convertToInteger(&Exact) purports to do what we want, but
+ // the exactness can be too precise. For example, negative zero can
+ // never be exactly converted to an integer.
+ //
+ // Instead, we ask APFloat to round itself to an integral value - this
+ // preserves sign-of-zero - then compare the result with the original.
+ //
+ const APFloat &F = CF->getValueAPF();
- Instruction *I = It.first;
- std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
- switch (I->getOpcode()) {
- // FIXME: Handle select and phi nodes.
- default:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- llvm_unreachable("Should have been handled in walkForwards!");
+ // First, weed out obviously incorrect values. Non-finite numbers
+ // can't be represented and neither can negative zero, unless
+ // we're in fast math mode.
+ if (!F.isFinite() ||
+ (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+ !I->hasNoSignedZeros()))
+ return badRange();
- case Instruction::FNeg:
- Op = [](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 1 && "FNeg is a unary operator!");
- unsigned Size = Ops[0].getBitWidth();
- auto Zero = ConstantRange(APInt::getZero(Size));
- return Zero.sub(Ops[0]);
- };
- break;
+ APFloat NewF = F;
+ auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+ if (Res != APFloat::opOK || NewF != F)
+ return badRange();
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- Op = [I](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 2 && "its a binary operator!");
- auto BinOp = (Instruction::BinaryOps) I->getOpcode();
- return Ops[0].binaryOp(BinOp, Ops[1]);
- };
- break;
+ // OK, it's representable. Now get it.
+ APSInt Int(MaxIntegerBW+1, false);
+ bool Exact;
+ CF->getValueAPF().convertToInteger(Int,
+ APFloat::rmNearestTiesToEven,
+ &Exact);
+ OpRanges.push_back(ConstantRange(Int));
+ } else {
+ llvm_unreachable("Should have already marked this as badRange!");
+ }
+ }
- //
- // Root-only instructions - we'll only see these if they're the
- // first node in a walk.
- //
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- Op = [I](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
- // Note: We're ignoring the casts output size here as that's what the
- // caller expects.
- auto CastOp = (Instruction::CastOps)I->getOpcode();
- return Ops[0].castOp(CastOp, MaxIntegerBW+1);
- };
- break;
+ switch (I->getOpcode()) {
+ // FIXME: Handle select and phi nodes.
+ default:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ llvm_unreachable("Should have been handled in walkForwards!");
- case Instruction::FCmp:
- Op = [](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 2 && "FCmp is a binary operator!");
- return Ops[0].unionWith(Ops[1]);
- };
- break;
- }
+ case Instruction::FNeg: {
+ assert(OpRanges.size() == 1 && "FNeg is a unary operator!");
+ unsigned Size = OpRanges[0].getBitWidth();
+ auto Zero = ConstantRange(APInt::getZero(Size));
+ return Zero.sub(OpRanges[0]);
+ }
- bool Abort = false;
- SmallVector<ConstantRange,4> OpRanges;
- for (Value *O : I->operands()) {
- if (Instruction *OI = dyn_cast<Instruction>(O)) {
- assert(SeenInsts.find(OI) != SeenInsts.end() &&
- "def not seen before use!");
- OpRanges.push_back(SeenInsts.find(OI)->second);
- } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
- // Work out if the floating point number can be losslessly represented
- // as an integer.
- // APFloat::convertToInteger(&Exact) purports to do what we want, but
- // the exactness can be too precise. For example, negative zero can
- // never be exactly converted to an integer.
- //
- // Instead, we ask APFloat to round itself to an integral value - this
- // preserves sign-of-zero - then compare the result with the original.
- //
- const APFloat &F = CF->getValueAPF();
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul: {
+ assert(OpRanges.size() == 2 && "its a binary operator!");
+ auto BinOp = (Instruction::BinaryOps) I->getOpcode();
+ return OpRanges[0].binaryOp(BinOp, OpRanges[1]);
+ }
- // First, weed out obviously incorrect values. Non-finite numbers
- // can't be represented and neither can negative zero, unless
- // we're in fast math mode.
- if (!F.isFinite() ||
- (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
- !I->hasNoSignedZeros())) {
- seen(I, badRange());
- Abort = true;
- break;
- }
+ //
+ // Root-only instructions - we'll only see these if they're the
+ // first node in a walk.
+ //
+ case Instruction::FPToUI:
+ case Instruction::FPToSI: {
+ assert(OpRanges.size() == 1 && "FPTo[US]I is a unary operator!");
+ // Note: We're ignoring the casts output size here as that's what the
+ // caller expects.
+ auto CastOp = (Instruction::CastOps)I->getOpcode();
+ return OpRanges[0].castOp(CastOp, MaxIntegerBW+1);
+ }
- APFloat NewF = F;
- auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
- if (Res != APFloat::opOK || NewF != F) {
- seen(I, badRange());
- Abort = true;
- break;
- }
- // OK, it's representable. Now get it.
- APSInt Int(MaxIntegerBW+1, false);
- bool Exact;
- CF->getValueAPF().convertToInteger(Int,
- APFloat::rmNearestTiesToEven,
- &Exact);
- OpRanges.push_back(ConstantRange(Int));
- } else {
- llvm_unreachable("Should have already marked this as badRange!");
- }
- }
+ case Instruction::FCmp:
+ assert(OpRanges.size() == 2 && "FCmp is a binary operator!");
+ return OpRanges[0].unionWith(OpRanges[1]);
+ }
+}
+
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2IntPass::walkForwards() {
+ std::deque<Instruction *> Worklist;
+ for (const auto &Pair : SeenInsts)
+ if (Pair.second == unknownRange())
+ Worklist.push_back(Pair.first);
+
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
- // Reduce the operands' ranges to a single range and return.
- if (!Abort)
- seen(I, Op(OpRanges));
+ if (Optional<ConstantRange> Range = calcRange(I))
+ seen(I, *Range);
+ else
+ Worklist.push_front(I); // Reprocess later.
}
}
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 398c93e8758c..783301fe589e 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -32,6 +31,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -42,12 +42,10 @@
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -55,11 +53,9 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -72,7 +68,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -112,16 +107,16 @@ static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
cl::init(true));
static cl::opt<bool>
GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
- cl::init(true));
+ cl::init(false));
static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
static cl::opt<uint32_t> MaxNumDeps(
- "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore,
+ "gvn-max-num-deps", cl::Hidden, cl::init(100),
cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
static cl::opt<uint32_t> MaxBBSpeculations(
- "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore,
+ "gvn-max-block-speculations", cl::Hidden, cl::init(600),
cl::desc("Max number of blocks we're willing to speculate on (and recurse "
"into) when deducing if a value is fully available or not in GVN "
"(default = 600)"));
@@ -129,6 +124,8 @@ static cl::opt<uint32_t> MaxBBSpeculations(
struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
+ // The type is not necessarily the result type of the expression, it may be
+ // any additional type needed to disambiguate the expression.
Type *type = nullptr;
SmallVector<uint32_t, 4> varargs;
@@ -178,70 +175,88 @@ template <> struct DenseMapInfo<GVNPass::Expression> {
/// implicitly associated with a rematerialization point which is the
/// location of the instruction from which it was formed.
struct llvm::gvn::AvailableValue {
- enum ValType {
+ enum class ValType {
SimpleVal, // A simple offsetted value that is accessed.
LoadVal, // A value produced by a load.
MemIntrin, // A memory intrinsic which is loaded from.
- UndefVal // A UndefValue representing a value from dead block (which
+ UndefVal, // A UndefValue representing a value from dead block (which
// is not yet physically removed from the CFG).
+ SelectVal, // A pointer select which is loaded from and for which the load
+ // can be replace by a value select.
};
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
+ /// Val - The value that is live out of the block.
+ Value *Val;
+ /// Kind of the live-out value.
+ ValType Kind;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset = 0;
static AvailableValue get(Value *V, unsigned Offset = 0) {
AvailableValue Res;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
+ Res.Val = V;
+ Res.Kind = ValType::SimpleVal;
Res.Offset = Offset;
return Res;
}
static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
AvailableValue Res;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
+ Res.Val = MI;
+ Res.Kind = ValType::MemIntrin;
Res.Offset = Offset;
return Res;
}
static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
AvailableValue Res;
- Res.Val.setPointer(Load);
- Res.Val.setInt(LoadVal);
+ Res.Val = Load;
+ Res.Kind = ValType::LoadVal;
Res.Offset = Offset;
return Res;
}
static AvailableValue getUndef() {
AvailableValue Res;
- Res.Val.setPointer(nullptr);
- Res.Val.setInt(UndefVal);
+ Res.Val = nullptr;
+ Res.Kind = ValType::UndefVal;
Res.Offset = 0;
return Res;
}
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
- bool isUndefValue() const { return Val.getInt() == UndefVal; }
+ static AvailableValue getSelect(SelectInst *Sel) {
+ AvailableValue Res;
+ Res.Val = Sel;
+ Res.Kind = ValType::SelectVal;
+ Res.Offset = 0;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
+ bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
+ bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
+ bool isUndefValue() const { return Kind == ValType::UndefVal; }
+ bool isSelectValue() const { return Kind == ValType::SelectVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
+ return Val;
}
LoadInst *getCoercedLoadValue() const {
assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
+ return cast<LoadInst>(Val);
}
MemIntrinsic *getMemIntrinValue() const {
assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
+ return cast<MemIntrinsic>(Val);
+ }
+
+ SelectInst *getSelectValue() const {
+ assert(isSelectValue() && "Wrong accessor");
+ return cast<SelectInst>(Val);
}
/// Emit code at the specified insertion point to adjust the value defined
@@ -275,6 +290,10 @@ struct llvm::gvn::AvailableValueInBlock {
return get(BB, AvailableValue::getUndef());
}
+ static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel) {
+ return get(BB, AvailableValue::getSelect(Sel));
+ }
+
/// Emit code at the end of this block to adjust the value defined here to
/// the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
@@ -379,6 +398,39 @@ GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
return e;
}
+GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
+ Expression E;
+ Type *PtrTy = GEP->getType()->getScalarType();
+ const DataLayout &DL = GEP->getModule()->getDataLayout();
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(BitWidth, 0);
+ if (PtrTy->isOpaquePointerTy() &&
+ GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
+ // For opaque pointers, convert into offset representation, to recognize
+ // equivalent address calculations that use different type encoding.
+ LLVMContext &Context = GEP->getContext();
+ E.opcode = GEP->getOpcode();
+ E.type = nullptr;
+ E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand()));
+ for (const auto &Pair : VariableOffsets) {
+ E.varargs.push_back(lookupOrAdd(Pair.first));
+ E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second)));
+ }
+ if (!ConstantOffset.isZero())
+ E.varargs.push_back(
+ lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
+ } else {
+ // If converting to offset representation fails (for typed pointers and
+ // scalable vectors), fall back to type-based implementation:
+ E.opcode = GEP->getOpcode();
+ E.type = GEP->getSourceElementType();
+ for (Use &Op : GEP->operands())
+ E.varargs.push_back(lookupOrAdd(Op));
+ }
+ return E;
+}
+
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
@@ -562,9 +614,11 @@ uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
case Instruction::InsertElement:
case Instruction::ShuffleVector:
case Instruction::InsertValue:
- case Instruction::GetElementPtr:
exp = createExpr(I);
break;
+ case Instruction::GetElementPtr:
+ exp = createGEPExpr(cast<GetElementPtrInst>(I));
+ break;
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
@@ -639,24 +693,24 @@ void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
bool GVNPass::isPREEnabled() const {
- return Options.AllowPRE.getValueOr(GVNEnablePRE);
+ return Options.AllowPRE.value_or(GVNEnablePRE);
}
bool GVNPass::isLoadPREEnabled() const {
- return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE);
+ return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
}
bool GVNPass::isLoadInLoopPREEnabled() const {
- return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
+ return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
}
bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
- return Options.AllowLoadPRESplitBackedge.getValueOr(
+ return Options.AllowLoadPRESplitBackedge.value_or(
GVNEnableSplitBackedgeInLoadPRE);
}
bool GVNPass::isMemDepEnabled() const {
- return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
+ return Options.AllowMemDep.value_or(GVNEnableMemDep);
}
PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
@@ -897,6 +951,17 @@ ConstructSSAForLoadSet(LoadInst *Load,
return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
}
+static LoadInst *findDominatingLoad(Value *Ptr, Type *LoadTy, SelectInst *Sel,
+ DominatorTree &DT) {
+ for (Value *U : Ptr->users()) {
+ auto *LI = dyn_cast<LoadInst>(U);
+ if (LI && LI->getType() == LoadTy && LI->getParent() == Sel->getParent() &&
+ DT.dominates(LI, Sel))
+ return LI;
+ }
+ return nullptr;
+}
+
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
Instruction *InsertPt,
GVNPass &gvn) const {
@@ -937,6 +1002,17 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n'
<< "\n\n\n");
+ } else if (isSelectValue()) {
+ // Introduce a new value select for a load from an eligible pointer select.
+ SelectInst *Sel = getSelectValue();
+ LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel,
+ gvn.getDominatorTree());
+ LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel,
+ gvn.getDominatorTree());
+ assert(L1 && L2 &&
+ "must be able to obtain dominating loads for both value operands of "
+ "the select");
+ Res = SelectInst::Create(Sel->getCondition(), L1, L2, "", Sel);
} else {
llvm_unreachable("Should not materialize value from dead block");
}
@@ -1023,8 +1099,54 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
ORE->emit(R);
}
+/// Check if a load from pointer-select \p Address in \p DepBB can be converted
+/// to a value select. The following conditions need to be satisfied:
+/// 1. The pointer select (\p Address) must be defined in \p DepBB.
+/// 2. Both value operands of the pointer select must be loaded in the same
+/// basic block, before the pointer select.
+/// 3. There must be no instructions between the found loads and \p End that may
+/// clobber the loads.
+static Optional<AvailableValue>
+tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
+ Value *Address, Type *LoadTy, DominatorTree &DT,
+ AAResults *AA) {
+
+ auto *Sel = dyn_cast_or_null<SelectInst>(Address);
+ if (!Sel || DepBB != Sel->getParent())
+ return None;
+
+ LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT);
+ LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT);
+ if (!L1 || !L2)
+ return None;
+
+ // Ensure there are no accesses that may modify the locations referenced by
+ // either L1 or L2 between L1, L2 and the specified End iterator.
+ Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2;
+ MemoryLocation L1Loc = MemoryLocation::get(L1);
+ MemoryLocation L2Loc = MemoryLocation::get(L2);
+ if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) {
+ return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
+ isModSet(AA->getModRefInfo(&I, L2Loc));
+ }))
+ return None;
+
+ return AvailableValue::getSelect(Sel);
+}
+
bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
Value *Address, AvailableValue &Res) {
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
+ assert(isa<SelectInst>(Address));
+ if (auto R = tryToConvertLoadOfPtrSelect(
+ Load->getParent(), Load->getIterator(), Address, Load->getType(),
+ getDominatorTree(), getAliasAnalysis())) {
+ Res = *R;
+ return true;
+ }
+ return false;
+ }
+
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1066,9 +1188,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
const auto ClobberOff = MD->getClobberOffset(DepLoad);
// GVN has no deal with a negative offset.
- Offset = (ClobberOff == None || ClobberOff.getValue() < 0)
- ? -1
- : ClobberOff.getValue();
+ Offset = (ClobberOff == None || *ClobberOff < 0) ? -1 : *ClobberOff;
}
if (Offset == -1)
Offset =
@@ -1092,6 +1212,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
}
}
}
+
// Nothing known about this clobber, have to be conservative
LLVM_DEBUG(
// fast print dep, using operator<< on instruction is too slow.
@@ -1111,12 +1232,11 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
return true;
}
- if (isAllocationFn(DepInst, TLI))
- if (auto *InitVal = getInitialValueOfAllocation(cast<CallBase>(DepInst),
- TLI, Load->getType())) {
- Res = AvailableValue::get(InitVal);
- return true;
- }
+ if (Constant *InitVal =
+ getInitialValueOfAllocation(DepInst, TLI, Load->getType())) {
+ Res = AvailableValue::get(InitVal);
+ return true;
+ }
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
@@ -1176,16 +1296,23 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
continue;
}
- if (!DepInfo.isDef() && !DepInfo.isClobber()) {
- UnavailableBlocks.push_back(DepBB);
- continue;
- }
-
// The address being loaded in this non-local block may not be the same as
// the pointer operand of the load if PHI translation occurs. Make sure
// to consider the right address.
Value *Address = Deps[i].getAddress();
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
+ if (auto R = tryToConvertLoadOfPtrSelect(
+ DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(),
+ getAliasAnalysis())) {
+ ValuesPerBlock.push_back(
+ AvailableValueInBlock::get(DepBB, std::move(*R)));
+ continue;
+ }
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
AvailableValue AV;
if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) {
// subtlety: because we know this was a non-local dependency, we know
@@ -1923,8 +2050,9 @@ bool GVNPass::processLoad(LoadInst *L) {
if (Dep.isNonLocal())
return processNonLocalLoad(L);
+ Value *Address = L->getPointerOperand();
// Only handle the local case below
- if (!Dep.isDef() && !Dep.isClobber()) {
+ if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) {
// This might be a NonFuncLocal or an Unknown
LLVM_DEBUG(
// fast print dep, using operator<< on instruction is too slow.
@@ -1934,7 +2062,7 @@ bool GVNPass::processLoad(LoadInst *L) {
}
AvailableValue AV;
- if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
+ if (AnalyzeLoadAvailability(L, Dep, Address, AV)) {
Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
// Replace the load!
@@ -2324,7 +2452,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
const DataLayout &DL = I->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
+ if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
bool Changed = false;
if (!I->use_empty()) {
// Simplification can cause a special instruction to become not special.
@@ -2491,6 +2619,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
unsigned Iteration = 0;
while (ShouldContinue) {
LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+ (void) Iteration;
ShouldContinue = iterateOnFunction(F);
Changed |= ShouldContinue;
++Iteration;
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index fdc3afd9348a..6cdc671ddb64 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -54,11 +54,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
@@ -126,7 +124,7 @@ using HoistingPointInfo = std::pair<BasicBlock *, SmallVecInsn>;
using HoistingPointList = SmallVector<HoistingPointInfo, 4>;
// A map from a pair of VNs to all the instructions with those VNs.
-using VNType = std::pair<unsigned, unsigned>;
+using VNType = std::pair<unsigned, uintptr_t>;
using VNtoInsns = DenseMap<VNType, SmallVector<Instruction *, 4>>;
@@ -161,7 +159,7 @@ using InValuesType =
// An invalid value number Used when inserting a single value number into
// VNtoInsns.
-enum : unsigned { InvalidVN = ~2U };
+enum : uintptr_t { InvalidVN = ~(uintptr_t)2 };
// Records all scalar instructions candidate for code hoisting.
class InsnInfo {
@@ -187,7 +185,9 @@ public:
void insert(LoadInst *Load, GVNPass::ValueTable &VN) {
if (Load->isSimple()) {
unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
- VNtoLoads[{V, InvalidVN}].push_back(Load);
+ // With opaque pointers we may have loads from the same pointer with
+ // different result types, which should be disambiguated.
+ VNtoLoads[{V, (uintptr_t)Load->getType()}].push_back(Load);
}
}
@@ -261,7 +261,9 @@ public:
GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA,
MemoryDependenceResults *MD, MemorySSA *MSSA)
: DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
- MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
+ MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {
+ MSSA->ensureOptimizedUses();
+ }
bool run(Function &F);
@@ -1147,6 +1149,8 @@ std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
DFSNumber[Repl] = DFSNumber[Last]++;
}
+ // Drop debug location as per debug info update guide.
+ Repl->dropLocation();
NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
if (isa<LoadInst>(Repl))
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index e612a82fc89a..720b8e71fd56 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -35,7 +35,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
@@ -45,7 +44,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -383,6 +381,8 @@ public:
}
};
+using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
+
class ValueTable {
DenseMap<Value *, uint32_t> ValueNumbering;
DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
@@ -390,6 +390,7 @@ class ValueTable {
BumpPtrAllocator Allocator;
ArrayRecycler<Value *> Recycler;
uint32_t nextValueNumber = 1;
+ BasicBlocksSet ReachableBBs;
/// Create an expression for I based on its opcode and its uses. If I
/// touches or reads memory, the expression is also based upon its memory
@@ -421,6 +422,11 @@ class ValueTable {
public:
ValueTable() = default;
+ /// Set basic blocks reachable from entry block.
+ void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
+ this->ReachableBBs = ReachableBBs;
+ }
+
/// Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t lookupOrAdd(Value *V) {
@@ -434,6 +440,9 @@ public:
}
Instruction *I = cast<Instruction>(V);
+ if (!ReachableBBs.contains(I->getParent()))
+ return ~0U;
+
InstructionUseExpr *exp = nullptr;
switch (I->getOpcode()) {
case Instruction::Load:
@@ -570,6 +579,7 @@ public:
unsigned NumSunk = 0;
ReversePostOrderTraversal<Function*> RPOT(&F);
+ VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
for (auto *N : RPOT)
NumSunk += sinkBB(N);
@@ -648,12 +658,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
VNums[N]++;
}
unsigned VNumToSink =
- std::max_element(VNums.begin(), VNums.end(),
- [](const std::pair<uint32_t, unsigned> &I,
- const std::pair<uint32_t, unsigned> &J) {
- return I.second < J.second;
- })
- ->first;
+ std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
if (VNums[VNumToSink] == 1)
// Can't sink anything!
@@ -776,12 +781,9 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
unsigned NumOrigPreds = Preds.size();
// We can only sink instructions through unconditional branches.
- for (auto I = Preds.begin(); I != Preds.end();) {
- if ((*I)->getTerminator()->getNumSuccessors() != 1)
- I = Preds.erase(I);
- else
- ++I;
- }
+ llvm::erase_if(Preds, [](BasicBlock *BB) {
+ return BB->getTerminator()->getNumSuccessors() != 1;
+ });
LockstepReverseIterator LRI(Preds);
SmallVector<SinkingInstructionCandidate, 4> Candidates;
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 82b81003ef21..af6062d142f0 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -42,7 +42,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -496,6 +495,8 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
makeAvailableAt(Op, Loc);
Inst->moveBefore(Loc);
+ // If we moved instruction before guard we must clean poison generating flags.
+ Inst->dropPoisonGeneratingFlags();
}
bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
diff --git a/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp b/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp
index e2022aba97c4..26f2db183fbf 100644
--- a/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp
+++ b/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp
@@ -8,7 +8,6 @@
#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
#include "llvm/Analysis/IVUsers.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "iv-users"
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index ceb03eb17f6d..e977dd18be9f 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -25,10 +25,7 @@
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -74,11 +71,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -387,7 +382,7 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
// Delete the old floating point increment.
- Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
// If the FP induction variable still has uses, this is because something else
@@ -605,10 +600,10 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
Intrinsic::getName(Intrinsic::experimental_guard));
bool HasGuards = GuardDecl && !GuardDecl->use_empty();
- SmallVector<PHINode*, 8> LoopPhis;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- LoopPhis.push_back(cast<PHINode>(I));
- }
+ SmallVector<PHINode *, 8> LoopPhis;
+ for (PHINode &PN : L->getHeader()->phis())
+ LoopPhis.push_back(&PN);
+
// Each round of simplification iterates through the SimplifyIVUsers worklist
// for all current phis, then determines whether any IVs can be
// widened. Widening adds new phis to LoopPhis, inducing another round of
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 0e5653eeb7d5..799669a19796 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -56,8 +56,6 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"
@@ -1411,12 +1409,12 @@ bool LoopConstrainer::run() {
bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
Optional<SubRanges> MaybeSR = calculateSubRanges(IsSignedPredicate);
- if (!MaybeSR.hasValue()) {
+ if (!MaybeSR) {
LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
return false;
}
- SubRanges SR = MaybeSR.getValue();
+ SubRanges SR = *MaybeSR;
bool Increasing = MainLoopStructure.IndVarIncreasing;
IntegerType *IVTy =
cast<IntegerType>(Range.getBegin()->getType());
@@ -1429,9 +1427,9 @@ bool LoopConstrainer::run() {
// constructor.
ClonedLoop PreLoop, PostLoop;
bool NeedsPreLoop =
- Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
+ Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
bool NeedsPostLoop =
- Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
+ Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
Value *ExitPreLoopAt = nullptr;
Value *ExitMainLoopAt = nullptr;
@@ -1710,7 +1708,7 @@ IntersectSignedRange(ScalarEvolution &SE,
const InductiveRangeCheck::Range &R2) {
if (R2.isEmpty(SE, /* IsSigned */ true))
return None;
- if (!R1.hasValue())
+ if (!R1)
return R2;
auto &R1Value = R1.getValue();
// We never return empty ranges from this function, and R1 is supposed to be
@@ -1739,7 +1737,7 @@ IntersectUnsignedRange(ScalarEvolution &SE,
const InductiveRangeCheck::Range &R2) {
if (R2.isEmpty(SE, /* IsSigned */ false))
return None;
- if (!R1.hasValue())
+ if (!R1)
return R2;
auto &R1Value = R1.getValue();
// We never return empty ranges from this function, and R1 is supposed to be
@@ -1763,10 +1761,14 @@ IntersectUnsignedRange(ScalarEvolution &SE,
}
PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
// Get BFI analysis result on demand. Please note that modification of
// CFG invalidates this analysis and we should handle it.
@@ -1854,7 +1856,7 @@ InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
LoopStructure &LS) {
if (SkipProfitabilityChecks)
return true;
- if (GetBFI.hasValue()) {
+ if (GetBFI) {
BlockFrequencyInfo &BFI = (*GetBFI)();
uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
@@ -1920,12 +1922,12 @@ bool InductiveRangeCheckElimination::run(
const char *FailureReason = nullptr;
Optional<LoopStructure> MaybeLoopStructure =
LoopStructure::parseLoopStructure(SE, *L, FailureReason);
- if (!MaybeLoopStructure.hasValue()) {
+ if (!MaybeLoopStructure) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
return false;
}
- LoopStructure LS = MaybeLoopStructure.getValue();
+ LoopStructure LS = *MaybeLoopStructure;
if (!isProfitableToTransform(*L, LS))
return false;
const SCEVAddRecExpr *IndVar =
@@ -1946,10 +1948,10 @@ bool InductiveRangeCheckElimination::run(
for (InductiveRangeCheck &IRC : RangeChecks) {
auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
LS.IsSignedPredicate);
- if (Result.hasValue()) {
+ if (Result) {
auto MaybeSafeIterRange =
IntersectRange(SE, SafeIterRange, Result.getValue());
- if (MaybeSafeIterRange.hasValue()) {
+ if (MaybeSafeIterRange) {
assert(
!MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) &&
"We should never return empty ranges!");
@@ -1959,7 +1961,7 @@ bool InductiveRangeCheckElimination::run(
}
}
- if (!SafeIterRange.hasValue())
+ if (!SafeIterRange)
return false;
LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT,
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 8f5933b7bd71..5eefde2e37a1 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -92,8 +92,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -182,7 +180,7 @@ public:
class InferAddressSpacesImpl {
AssumptionCache &AC;
- DominatorTree *DT = nullptr;
+ const DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
const DataLayout *DL = nullptr;
@@ -213,10 +211,11 @@ class InferAddressSpacesImpl {
// Changes the flat address expressions in function F to point to specific
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
// all flat expressions in the use-def graph of function F.
- bool rewriteWithNewAddressSpaces(
- const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace,
- const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const;
+ bool
+ rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
@@ -240,7 +239,7 @@ class InferAddressSpacesImpl {
unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
public:
- InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT,
+ InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,
const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
: AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
bool run(Function &F);
@@ -280,15 +279,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
// arithmetic may also be undefined after invalid pointer reinterpret cast.
// However, as we confirm through the target hooks that it's a no-op
// addrspacecast, it doesn't matter since the bits should be the same.
+ unsigned P2IOp0AS = P2I->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned I2PAS = I2P->getType()->getPointerAddressSpace();
return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),
I2P->getOperand(0)->getType(), I2P->getType(),
DL) &&
CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),
P2I->getOperand(0)->getType(), P2I->getType(),
DL) &&
- TTI->isNoopAddrSpaceCast(
- P2I->getOperand(0)->getType()->getPointerAddressSpace(),
- I2P->getType()->getPointerAddressSpace());
+ (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(P2IOp0AS, I2PAS));
}
// Returns true if V is an address expression.
@@ -332,8 +331,7 @@ getPointerOperands(const Value &V, const DataLayout &DL,
switch (Op.getOpcode()) {
case Instruction::PHI: {
auto IncomingValues = cast<PHINode>(Op).incoming_values();
- return SmallVector<Value *, 2>(IncomingValues.begin(),
- IncomingValues.end());
+ return {IncomingValues.begin(), IncomingValues.end()};
}
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
@@ -655,10 +653,13 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
case Instruction::IntToPtr: {
assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
- assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
- if (Src->getType() != NewPtrType)
- return new BitCastInst(Src, NewPtrType);
- return Src;
+ if (Src->getType() == NewPtrType)
+ return Src;
+
+ // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a
+ // source address space from a generic pointer source need to insert a cast
+ // back.
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(Src, NewPtrType);
}
default:
llvm_unreachable("Unexpected opcode");
@@ -726,7 +727,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
NewOperands.push_back(cast<Constant>(NewOperand));
continue;
}
- if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
+ if (auto *CExpr = dyn_cast<ConstantExpr>(Operand))
if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
IsNew = true;
@@ -738,7 +739,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
}
// If !IsNew, we will replace the Value with itself. However, replaced values
- // are assumed to wrapped in a addrspace cast later so drop it now.
+ // are assumed to wrapped in an addrspacecast cast later so drop it now.
if (!IsNew)
return nullptr;
@@ -821,8 +822,8 @@ bool InferAddressSpacesImpl::run(Function &F) {
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace,
- PredicatedAS, &F);
+ return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS,
+ &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
@@ -1010,7 +1011,7 @@ static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
}
/// Update memory intrinsic uses that require more complex processing than
-/// simple memory instructions. Thse require re-mangling and may have multiple
+/// simple memory instructions. These require re-mangling and may have multiple
/// pointer operands.
static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
Value *NewV) {
@@ -1020,8 +1021,7 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
- B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(),
- MaybeAlign(MSI->getDestAlignment()),
+ B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), MSI->getDestAlign(),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
@@ -1104,7 +1104,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
}
bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
- const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
+ ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
@@ -1178,7 +1178,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
I = skipToNextUser(I, E);
if (isSimplePointerUseValidToReplace(
- TTI, U, V->getType()->getPointerAddressSpace())) {
+ *TTI, U, V->getType()->getPointerAddressSpace())) {
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
@@ -1239,8 +1239,16 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (!cast<PointerType>(ASC->getType())
->hasSameElementTypeAs(
cast<PointerType>(NewV->getType()))) {
+ BasicBlock::iterator InsertPos;
+ if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
+ InsertPos = std::next(NewVInst->getIterator());
+ else if (Instruction *VInst = dyn_cast<Instruction>(V))
+ InsertPos = std::next(VInst->getIterator());
+ else
+ InsertPos = ASC->getIterator();
+
NewV = CastInst::Create(Instruction::BitCast, NewV,
- ASC->getType(), "", ASC);
+ ASC->getType(), "", &*InsertPos);
}
ASC->replaceAllUsesWith(NewV);
DeadInstructions.push_back(ASC);
@@ -1249,12 +1257,18 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Instruction *VInst = dyn_cast<Instruction>(V)) {
// Don't create a copy of the original addrspacecast.
if (U == V && isa<AddrSpaceCastInst>(V))
continue;
- BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
+ // Insert the addrspacecast after NewV.
+ BasicBlock::iterator InsertPos;
+ if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
+ InsertPos = std::next(NewVInst->getIterator());
+ else
+ InsertPos = std::next(VInst->getIterator());
+
while (isa<PHINode>(InsertPos))
++InsertPos;
U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index c11d2e4c1d6b..4644905adba3 100644
--- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -7,21 +7,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -55,7 +51,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
DeadInstsInBB.push_back(&I);
Changed = true;
} else if (!I.use_empty()) {
- if (Value *V = SimplifyInstruction(&I, SQ, ORE)) {
+ if (Value *V = simplifyInstruction(&I, SQ, ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I.users())
Next->insert(cast<Instruction>(U));
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index a3efad104ca6..5caefc422921 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -56,7 +56,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -74,7 +73,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <iterator>
#include <memory>
@@ -106,11 +104,6 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
-static cl::opt<bool> JumpThreadingFreezeSelectCond(
- "jump-threading-freeze-select-cond",
- cl::desc("Freeze the condition when unfolding select"), cl::init(false),
- cl::Hidden);
-
static cl::opt<bool> ThreadAcrossLoopHeaders(
"jump-threading-across-loop-headers",
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -140,8 +133,7 @@ namespace {
public:
static char ID; // Pass identification
- JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
- : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
+ JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -175,12 +167,11 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
- return new JumpThreading(InsertFr, Threshold);
+FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
+ return new JumpThreading(Threshold);
}
-JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
- InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
+JumpThreadingPass::JumpThreadingPass(int T) {
DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
@@ -326,7 +317,7 @@ bool JumpThreading::runOnFunction(Function &F) {
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
if (F.hasProfileData()) {
- LoopInfo LI{DominatorTree(F)};
+ LoopInfo LI{*DT};
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
@@ -491,14 +482,16 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// at the end of block. RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
-static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
+static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
+ BasicBlock *KnownAtEndOfBB) {
+ bool Changed = false;
assert(Cond->getType() == ToVal->getType());
- auto *BB = Cond->getParent();
// We can unconditionally replace all uses in non-local blocks (i.e. uses
// strictly dominated by BB), since LVI information is true from the
// terminator of BB.
- replaceNonLocalUsesWith(Cond, ToVal);
- for (Instruction &I : reverse(*BB)) {
+ if (Cond->getParent() == KnownAtEndOfBB)
+ Changed |= replaceNonLocalUsesWith(Cond, ToVal);
+ for (Instruction &I : reverse(*KnownAtEndOfBB)) {
// Reached the Cond whose uses we are trying to replace, so there are no
// more uses.
if (&I == Cond)
@@ -507,10 +500,13 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
// of BB, where we know Cond is ToVal.
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
- I.replaceUsesOfWith(Cond, ToVal);
+ Changed |= I.replaceUsesOfWith(Cond, ToVal);
}
- if (Cond->use_empty() && !Cond->mayHaveSideEffects())
+ if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
Cond->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
}
/// Return the cost of duplicating a piece of this block from first non-phi
@@ -792,6 +788,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
if (Preference != WantInteger)
return false;
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ const DataLayout &DL = BO->getModule()->getDataLayout();
PredValueInfoTy LHSVals;
computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
@@ -799,7 +796,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
// Try to use constant folding to simplify the binary operator.
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
- Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+ Constant *Folded =
+ ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
Result.emplace_back(KC, LHSVal.second);
@@ -835,7 +833,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
LHS = CmpLHS->DoPHITranslation(BB, PredBB);
RHS = PN->getIncomingValue(i);
}
- Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
+ Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -1135,34 +1133,21 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
return ConstantFolded;
}
- if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ // Some of the following optimization can safely work on the unfrozen cond.
+ Value *CondWithoutFreeze = CondInst;
+ if (auto *FI = dyn_cast<FreezeInst>(CondInst))
+ CondWithoutFreeze = FI->getOperand(0);
+
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
// against a constant at this time.
- // TODO: This should be extended to handle switches as well.
- BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
- Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
- if (CondBr && CondConst) {
- // We should have returned as soon as we turn a conditional branch to
- // unconditional. Because its no longer interesting as far as jump
- // threading is concerned.
- assert(CondBr->isConditional() && "Threading on unconditional terminator");
-
+ if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
LazyValueInfo::Tristate Ret =
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
- CondConst, CondBr, /*UseBlockValue=*/false);
+ CondConst, BB->getTerminator(),
+ /*UseBlockValue=*/false);
if (Ret != LazyValueInfo::Unknown) {
- unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
- unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
- BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
- ToRemoveSucc->removePredecessor(BB, true);
- BranchInst *UncondBr =
- BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
- UncondBr->setDebugLoc(CondBr->getDebugLoc());
- ++NumFolds;
- CondBr->eraseFromParent();
- if (CondCmp->use_empty())
- CondCmp->eraseFromParent();
// We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
@@ -1170,17 +1155,11 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
- else if (CondCmp->getParent() == BB) {
- auto *CI = Ret == LazyValueInfo::True ?
- ConstantInt::getTrue(CondCmp->getType()) :
- ConstantInt::getFalse(CondCmp->getType());
- replaceFoldableUses(CondCmp, CI);
- }
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, BB, ToRemoveSucc}});
- if (HasProfileData)
- BPI->eraseBlock(BB);
- return true;
+ auto *CI = Ret == LazyValueInfo::True ?
+ ConstantInt::getTrue(CondCmp->getType()) :
+ ConstantInt::getFalse(CondCmp->getType());
+ if (replaceFoldableUses(CondCmp, CI, BB))
+ return true;
}
// We did not manage to simplify this branch, try to see whether
@@ -1198,11 +1177,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// for loads that are used by a switch or by the condition for the branch. If
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
- Value *SimplifyValue = CondInst;
-
- if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
- // Look into freeze's operand
- SimplifyValue = FI->getOperand(0);
+ Value *SimplifyValue = CondWithoutFreeze;
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
@@ -1227,10 +1202,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
// the current block, see if we can simplify.
- PHINode *PN = dyn_cast<PHINode>(
- isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
- : CondInst);
-
+ PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return processBranchOnPHI(PN);
@@ -1253,6 +1225,17 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
return false;
Value *Cond = BI->getCondition();
+ // Assuming that predecessor's branch was taken, if pred's branch condition
+ // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
+ // freeze(Cond) is either true or a nondeterministic value.
+ // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
+ // without affecting other instructions.
+ auto *FICond = dyn_cast<FreezeInst>(Cond);
+ if (FICond && FICond->hasOneUse())
+ Cond = FICond->getOperand(0);
+ else
+ FICond = nullptr;
+
BasicBlock *CurrentBB = BB;
BasicBlock *CurrentPred = BB->getSinglePredecessor();
unsigned Iter = 0;
@@ -1269,6 +1252,15 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
Optional<bool> Implication =
isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
+
+ // If the branch condition of BB (which is Cond) and CurrentPred are
+ // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
+ if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
+ if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
+ FICond->getOperand(0))
+ Implication = CondIsTrue;
+ }
+
if (Implication) {
BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
@@ -1277,6 +1269,9 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
UncondBI->setDebugLoc(BI->getDebugLoc());
++NumFolds;
BI->eraseFromParent();
+ if (FICond)
+ FICond->eraseFromParent();
+
DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
if (HasProfileData)
BPI->eraseBlock(BB);
@@ -1338,10 +1333,10 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
combineMetadataForCSE(NLoadI, LoadI, false);
};
- // If the returned value is the load itself, replace with an undef. This can
+ // If the returned value is the load itself, replace with poison. This can
// only happen in dead loops.
if (AvailableVal == LoadI)
- AvailableVal = UndefValue::get(LoadI->getType());
+ AvailableVal = PoisonValue::get(LoadI->getType());
if (AvailableVal->getType() != LoadI->getType())
AvailableVal = CastInst::CreateBitOrPointerCast(
AvailableVal, LoadI->getType(), "", LoadI);
@@ -1566,10 +1561,8 @@ findMostPopularDest(BasicBlock *BB,
DestPopularity[PredToDest.second]++;
// Find the most popular dest.
- using VT = decltype(DestPopularity)::value_type;
auto MostPopular = std::max_element(
- DestPopularity.begin(), DestPopularity.end(),
- [](const VT &L, const VT &R) { return L.second < R.second; });
+ DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
// Okay, we have finally picked the most popular destination.
return MostPopular->first;
@@ -1742,9 +1735,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
- else if (OnlyVal && OnlyVal != MultipleVal &&
- CondInst->getParent() == BB)
- replaceFoldableUses(CondInst, OnlyVal);
+ else if (OnlyVal && OnlyVal != MultipleVal)
+ replaceFoldableUses(CondInst, OnlyVal, BB);
}
return true;
}
@@ -2672,7 +2664,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV = SimplifyInstruction(
+ if (Value *IV = simplifyInstruction(
New,
{BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
@@ -2912,9 +2904,7 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
continue;
// Expand the select.
Value *Cond = SI->getCondition();
- if (InsertFreezeWhenUnfoldingSelect &&
- !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI,
- &DTU->getDomTree()))
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
Cond = new FreezeInst(Cond, "cond.fr", SI);
Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
BasicBlock *SplitBB = SI->getParent();
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 7fb1a25bdf13..492f4e40395a 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -37,29 +37,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -78,7 +76,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -88,6 +85,11 @@
#include <utility>
using namespace llvm;
+namespace llvm {
+class BlockFrequencyInfo;
+class LPMUpdater;
+} // namespace llvm
+
#define DEBUG_TYPE "licm"
STATISTIC(NumCreatedBlocks, "Number of blocks created");
@@ -114,8 +116,7 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
-// address the same issue. This flag applies only when LICM uses MemorySSA
-// instead on AliasSetTracker. LICM calls MemorySSAWalker's
+// address the same issue. LICM calls MemorySSAWalker's
// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
// which may not be precise, since optimizeUses is capped. The result is
@@ -143,37 +144,32 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
OptimizationRemarkEmitter *ORE);
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE,
- const Instruction *CtxI = nullptr);
-static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
- AliasSetTracker *CurAST, Loop *CurLoop,
- AAResults *AA);
-static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags);
-static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
- MemoryUse &MU);
+static bool isSafeToExecuteUnconditionally(
+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+ bool AllowSpeculation);
+static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop, Instruction &I,
+ SinkAndHoistLICMFlags &Flags);
+static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
+ MemoryUse &MU);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
- const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
+ const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater &MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
@@ -188,21 +184,26 @@ struct LoopInvariantCodeMotion {
OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap)
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation)
: LicmMssaOptCap(LicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+ LicmAllowSpeculation(LicmAllowSpeculation) {}
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
+ bool LicmAllowSpeculation;
};
struct LegacyLICMPass : public LoopPass {
static char ID; // Pass identification, replacement for typeid
LegacyLICMPass(
unsigned LicmMssaOptCap = SetLicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
- : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
+ unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation = true)
+ : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation) {
initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
}
@@ -265,7 +266,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
+ Opts.AllowSpeculation);
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
&AR.SE, AR.MSSA, &ORE))
return PreservedAnalyses::all();
@@ -279,6 +281,16 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PA;
}
+void LICMPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
+ OS << ">";
+}
+
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
@@ -290,7 +302,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(LN.getParent());
- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
+ Opts.AllowSpeculation);
Loop &OutermostLoop = LN.getOutermostLoop();
bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
@@ -308,6 +321,16 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
return PA;
}
+void LNICMPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
+ OS << ">";
+}
+
char LegacyLICMPass::ID = 0;
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)
@@ -321,8 +344,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap) {
- return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation) {
+ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation);
}
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
@@ -365,6 +390,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
+ MSSA->ensureOptimizedUses();
// If this loop has metadata indicating that LICM is not to be performed then
// just exit.
@@ -411,14 +437,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
if (L->hasDedicatedExits())
Changed |= LoopNestMode
? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
- DT, BFI, TLI, TTI, L, &MSSAU,
+ DT, BFI, TLI, TTI, L, MSSAU,
&SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
- TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
+ TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE);
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
+ MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
+ LicmAllowSpeculation);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -451,8 +478,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
PredIteratorCache PIC;
// Promoting one set of accesses may make the pointers for another set
- // loop invariant, so run this in a loop (with the MaybePromotable set
- // decreasing in size over time).
+ // loop invariant, so run this in a loop.
bool Promoted = false;
bool LocalPromoted;
do {
@@ -460,8 +486,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
for (const SmallSetVector<Value *, 8> &PointerMustAliases :
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
- LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+ DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
@@ -502,17 +528,17 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, MemorySSAUpdater *MSSAU,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion.");
- // We want to visit children before parents. We will enque all the parents
+ // We want to visit children before parents. We will enqueue all the parents
// before their children in the worklist and process the worklist in reverse
// order.
SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
@@ -550,8 +576,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
if (!I.mayHaveSideEffects() &&
isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
- &Flags, ORE)) {
+ canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
@@ -564,14 +589,14 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
return Changed;
}
bool llvm::sinkRegionForLoopNest(
DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
bool Changed = false;
@@ -600,7 +625,7 @@ private:
LoopInfo *LI;
DominatorTree *DT;
Loop *CurLoop;
- MemorySSAUpdater *MSSAU;
+ MemorySSAUpdater &MSSAU;
// A map of blocks in the loop to the block their instructions will be hoisted
// to.
@@ -612,7 +637,7 @@ private:
public:
ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
- MemorySSAUpdater *MSSAU)
+ MemorySSAUpdater &MSSAU)
: LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
void registerPossiblyHoistableBranch(BranchInst *BI) {
@@ -788,7 +813,7 @@ public:
if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
// The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
@@ -822,13 +847,14 @@ public:
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, Loop *CurLoop,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
+ OptimizationRemarkEmitter *ORE, bool LoopNestMode,
+ bool AllowSpeculation) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion.");
ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
@@ -873,11 +899,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
- true, &Flags, ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
- CurLoop->getLoopPreheader()->getTerminator())) {
+ CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
@@ -982,7 +1007,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
// Now that we've finished hoisting make sure that LI and DT are still
// valid.
@@ -1083,30 +1108,19 @@ bool isHoistableAndSinkableInst(Instruction &I) {
isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
isa<InsertValueInst>(I) || isa<FreezeInst>(I));
}
-/// Return true if all of the alias sets within this AST are known not to
-/// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop.
-bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
- const Loop *L) {
- if (CurAST) {
- for (AliasSet &AS : *CurAST) {
- if (!AS.isForwardingAliasSet() && AS.isMod()) {
- return false;
- }
- }
- return true;
- } else { /*MSSAU*/
- for (auto *BB : L->getBlocks())
- if (MSSAU->getMemorySSA()->getBlockDefs(BB))
- return false;
- return true;
- }
+/// Return true if MSSA knows there are no MemoryDefs in the loop.
+bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
+ for (auto *BB : L->getBlocks())
+ if (MSSAU.getMemorySSA()->getBlockDefs(BB))
+ return false;
+ return true;
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
- const MemorySSAUpdater *MSSAU) {
+ const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
- if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
+ if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
for (const auto &Acc : *Accs) {
if (isa<MemoryPhi>(&Acc))
@@ -1121,22 +1135,15 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU,
bool TargetExecutesOncePerLoop,
- SinkAndHoistLICMFlags *Flags,
+ SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
-
// If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I))
return false;
- MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
- if (MSSA)
- assert(Flags != nullptr && "Flags cannot be null.");
-
+ MemorySSA *MSSA = MSSAU.getMemorySSA();
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
@@ -1156,13 +1163,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true;
- bool Invalidated;
- if (CurAST)
- Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
- CurLoop, AA);
- else
- Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
+ bool Invalidated = pointerInvalidatedByLoop(
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1210,24 +1212,17 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (AAResults::onlyAccessesArgPointees(Behavior)) {
// TODO: expand to writeable arguments
for (Value *Op : CI->args())
- if (Op->getType()->isPointerTy()) {
- bool Invalidated;
- if (CurAST)
- Invalidated = pointerInvalidatedByLoop(
- MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
- else
- Invalidated = pointerInvalidatedByLoopWithMSSA(
+ if (Op->getType()->isPointerTy() &&
+ pointerInvalidatedByLoop(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
- *Flags);
- if (Invalidated)
- return false;
- }
+ Flags))
+ return false;
return true;
}
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
- if (isReadOnly(CurAST, MSSAU, CurLoop))
+ if (isReadOnly(MSSAU, CurLoop))
return true;
}
@@ -1238,21 +1233,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
// Fences alias (most) everything to provide ordering. For the moment,
// just give up if there are any other memory operations in the loop.
- if (CurAST) {
- auto Begin = CurAST->begin();
- assert(Begin != CurAST->end() && "must contain FI");
- if (std::next(Begin) != CurAST->end())
- // constant memory for instance, TODO: handle better
- return false;
- auto *UniqueI = Begin->getUniqueInstruction();
- if (!UniqueI)
- // other memory op, give up
- return false;
- (void)FI; // suppress unused variable warning
- assert(UniqueI == FI && "AS must contain FI");
- return true;
- } else // MSSAU
- return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
+ return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isUnordered())
return false; // Don't sink/hoist volatile or ordered atomic store!
@@ -1262,68 +1243,54 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// load store promotion instead. TODO: We can extend this to cases where
// there is exactly one write to the location and that write dominates an
// arbitrary number of reads in the loop.
- if (CurAST) {
- auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
-
- if (AS.isRef() || !AS.isMustAlias())
- // Quick exit test, handled by the full path below as well.
- return false;
- auto *UniqueI = AS.getUniqueInstruction();
- if (!UniqueI)
- // other memory op, give up
- return false;
- assert(UniqueI == SI && "AS must contain SI");
+ if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- } else { // MSSAU
- if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
- return true;
- // If there are more accesses than the Promotion cap or no "quota" to
- // check clobber, then give up as we're not walking a list that long.
- if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
- return false;
- // If there are interfering Uses (i.e. their defining access is in the
- // loop), or ordered loads (stored as Defs!), don't move this store.
- // Could do better here, but this is conservatively correct.
- // TODO: Cache set of Uses on the first walk in runOnLoop, update when
- // moving accesses. Can also extend to dominating uses.
- auto *SIMD = MSSA->getMemoryAccess(SI);
- for (auto *BB : CurLoop->getBlocks())
- if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
- for (const auto &MA : *Accesses)
- if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
- auto *MD = MU->getDefiningAccess();
- if (!MSSA->isLiveOnEntryDef(MD) &&
- CurLoop->contains(MD->getBlock()))
- return false;
- // Disable hoisting past potentially interfering loads. Optimized
- // Uses may point to an access outside the loop, as getClobbering
- // checks the previous iteration when walking the backedge.
- // FIXME: More precise: no Uses that alias SI.
- if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
- return false;
- } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
- if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
- (void)LI; // Silence warning.
- assert(!LI->isUnordered() && "Expected unordered load");
+ // If there are more accesses than the Promotion cap or no "quota" to
+ // check clobber, then give up as we're not walking a list that long.
+ if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
+ return false;
+ // If there are interfering Uses (i.e. their defining access is in the
+ // loop), or ordered loads (stored as Defs!), don't move this store.
+ // Could do better here, but this is conservatively correct.
+ // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+ // moving accesses. Can also extend to dominating uses.
+ auto *SIMD = MSSA->getMemoryAccess(SI);
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses)
+ if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+ auto *MD = MU->getDefiningAccess();
+ if (!MSSA->isLiveOnEntryDef(MD) &&
+ CurLoop->contains(MD->getBlock()))
+ return false;
+ // Disable hoisting past potentially interfering loads. Optimized
+ // Uses may point to an access outside the loop, as getClobbering
+ // checks the previous iteration when walking the backedge.
+ // FIXME: More precise: no Uses that alias SI.
+ if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
+ return false;
+ } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
+ if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+ (void)LI; // Silence warning.
+ assert(!LI->isUnordered() && "Expected unordered load");
+ return false;
+ }
+ // Any call, while it may not be clobbering SI, it may be a use.
+ if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
+ // Check if the call may read from the memory location written
+ // to by SI. Check CI's attributes and arguments; the number of
+ // such checks performed is limited above by NoOfMemAccTooLarge.
+ ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
+ if (isModOrRefSet(MRI))
return false;
- }
- // Any call, while it may not be clobbering SI, it may be a use.
- if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
- // Check if the call may read from the memory location written
- // to by SI. Check CI's attributes and arguments; the number of
- // such checks performed is limited above by NoOfMemAccTooLarge.
- ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
- if (isModOrRefSet(MRI))
- return false;
- }
}
- }
- auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- Flags->incrementClobberingCalls();
- // If there are no clobbering Defs in the loop, store is safe to hoist.
- return MSSA->isLiveOnEntryDef(Source) ||
- !CurLoop->contains(Source->getBlock());
- }
+ }
+ }
+ auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+ Flags.incrementClobberingCalls();
+ // If there are no clobbering Defs in the loop, store is safe to hoist.
+ return MSSA->isLiveOnEntryDef(Source) ||
+ !CurLoop->contains(Source->getBlock());
}
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -1421,7 +1388,7 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
- const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
+ const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
Instruction *New;
if (auto *CI = dyn_cast<CallInst>(&I)) {
const auto &BlockColors = SafetyInfo->getBlockColors();
@@ -1457,16 +1424,16 @@ static Instruction *cloneInstructionInExitBlock(
if (!I.getName().empty())
New->setName(I.getName() + ".le");
- if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
// Create a new MemoryAccess and let MemorySSA set its defining access.
- MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+ MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
New, nullptr, New->getParent(), MemorySSA::Beginning);
if (NewMemAcc) {
if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
- MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+ MSSAU.insertDef(MemDef, /*RenameUses=*/true);
else {
auto *MemUse = cast<MemoryUse>(NewMemAcc);
- MSSAU->insertUse(MemUse, /*RenameUses=*/true);
+ MSSAU.insertUse(MemUse, /*RenameUses=*/true);
}
}
}
@@ -1492,25 +1459,22 @@ static Instruction *cloneInstructionInExitBlock(
}
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(&I);
+ MemorySSAUpdater &MSSAU) {
+ MSSAU.removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I);
I.eraseFromParent();
}
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU,
+ MemorySSAUpdater &MSSAU,
ScalarEvolution *SE) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
- if (MSSAU)
- if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&I)))
- MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
- MemorySSA::BeforeTerminator);
+ if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU.getMemorySSA()->getMemoryAccess(&I)))
+ MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
if (SE)
SE->forgetValue(&I);
}
@@ -1519,7 +1483,7 @@ static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater &MSSAU) {
assert(isTriviallyReplaceablePHI(*TPN, *I) &&
"Expect only trivially replaceable PHI");
BasicBlock *ExitBlock = TPN->getParent();
@@ -1625,7 +1589,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
OptimizationRemarkEmitter *ORE) {
bool Changed = false;
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
@@ -1642,7 +1606,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
continue;
if (!DT->isReachableFromEntry(User->getParent())) {
- U = UndefValue::get(I.getType());
+ U = PoisonValue::get(I.getType());
Changed = true;
continue;
}
@@ -1655,7 +1619,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// unreachable.
BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {
- U = UndefValue::get(I.getType());
+ U = PoisonValue::get(I.getType());
Changed = true;
continue;
}
@@ -1669,7 +1633,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// Split predecessors of the PHI so that we can make users trivially
// replaceable.
- splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
+ splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
// Should rebuild the iterators, as they may be invalidated by
// splitPredecessorsOfLoopExit().
@@ -1720,7 +1684,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
- eraseInstruction(*PN, *SafetyInfo, nullptr);
+ eraseInstruction(*PN, *SafetyInfo, MSSAU);
Changed = true;
}
return Changed;
@@ -1731,7 +1695,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
///
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
<< I << "\n");
@@ -1774,14 +1738,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
/// Only sink or hoist an instruction if it is not a trapping instruction,
/// or if the instruction is known not to trap when moved to the preheader.
/// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE,
- const Instruction *CtxI) {
- if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
+static bool isSafeToExecuteUnconditionally(
+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+ bool AllowSpeculation) {
+ if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
return true;
bool GuaranteedToExecute =
@@ -1809,7 +1771,7 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- MemorySSAUpdater *MSSAU;
+ MemorySSAUpdater &MSSAU;
LoopInfo &LI;
DebugLoc DL;
Align Alignment;
@@ -1841,7 +1803,7 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
+ MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
@@ -1883,14 +1845,14 @@ public:
MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
MemoryAccess *NewMemAcc;
if (!MSSAInsertPoint) {
- NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewMemAcc = MSSAU.createMemoryAccessInBB(
NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
} else {
NewMemAcc =
- MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
+ MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
MSSAInsertPts[i] = NewMemAcc;
- MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
// FIXME: true for safety, false may still be correct.
}
}
@@ -1902,7 +1864,7 @@ public:
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- MSSAU->removeMemoryAccess(I);
+ MSSAU.removeMemoryAccess(I);
}
bool shouldDelete(Instruction *I) const override {
@@ -1948,8 +1910,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -1997,6 +1959,7 @@ bool llvm::promoteLoopAccessesToScalars(
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
+ bool StoreIsGuanteedToExecute = false;
bool FoundLoadToPromote = false;
SmallVector<Instruction *, 64> LoopUses;
@@ -2031,9 +1994,9 @@ bool llvm::promoteLoopAccessesToScalars(
// different sizes. While we are at it, collect alignment and AA info.
Type *AccessTy = nullptr;
for (Value *ASIV : PointerMustAliases) {
- for (User *U : ASIV->users()) {
+ for (Use &U : ASIV->uses()) {
// Ignore instructions that are outside the loop.
- Instruction *UI = dyn_cast<Instruction>(U);
+ Instruction *UI = dyn_cast<Instruction>(U.getUser());
if (!UI || !CurLoop->contains(UI))
continue;
@@ -2054,16 +2017,16 @@ bool llvm::promoteLoopAccessesToScalars(
// to execute does as well. Thus we can increase our guaranteed
// alignment as well.
if (!DereferenceableInPH || (InstAlignment > Alignment))
- if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
- SafetyInfo, ORE,
- Preheader->getTerminator())) {
+ if (isSafeToExecuteUnconditionally(
+ *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
+ Preheader->getTerminator(), AllowSpeculation)) {
DereferenceableInPH = true;
Alignment = std::max(Alignment, InstAlignment);
}
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
- if (UI->getOperand(1) != ASIV)
+ if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
continue;
if (!Store->isUnordered())
return false;
@@ -2077,10 +2040,12 @@ bool llvm::promoteLoopAccessesToScalars(
// alignment than any other guaranteed stores, in which case we can
// raise the alignment on the promoted store.
Align InstAlignment = Store->getAlign();
-
+ bool GuaranteedToExecute =
+ SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
+ StoreIsGuanteedToExecute |= GuaranteedToExecute;
if (!DereferenceableInPH || !SafeToInsertStore ||
(InstAlignment > Alignment)) {
- if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
+ if (GuaranteedToExecute) {
DereferenceableInPH = true;
SafeToInsertStore = true;
Alignment = std::max(Alignment, InstAlignment);
@@ -2194,32 +2159,37 @@ bool llvm::promoteLoopAccessesToScalars(
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
- LoadInst *PreheaderLoad = new LoadInst(
- AccessTy, SomePtr, SomePtr->getName() + ".promoted",
- Preheader->getTerminator());
- if (SawUnorderedAtomic)
- PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
- PreheaderLoad->setAlignment(Alignment);
- PreheaderLoad->setDebugLoc(DebugLoc());
- if (AATags)
- PreheaderLoad->setAAMetadata(AATags);
- SSA.AddAvailableValue(Preheader, PreheaderLoad);
+ LoadInst *PreheaderLoad = nullptr;
+ if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
+ PreheaderLoad =
+ new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
+ Preheader->getTerminator());
+ if (SawUnorderedAtomic)
+ PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
+ PreheaderLoad->setAlignment(Alignment);
+ PreheaderLoad->setDebugLoc(DebugLoc());
+ if (AATags)
+ PreheaderLoad->setAAMetadata(AATags);
- MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
- PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
- MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
- MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
+ MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+ } else {
+ SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
+ }
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
- if (PreheaderLoad->use_empty())
+ if (PreheaderLoad && PreheaderLoad->use_empty())
eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
return true;
@@ -2246,8 +2216,7 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return false;
};
- // Populate AST with potentially promotable accesses and remove them from
- // MaybePromotable, so they will not be checked again on the next iteration.
+ // Populate AST with potentially promotable accesses.
SmallPtrSet<Value *, 16> AttemptingPromotion;
foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
if (IsPotentiallyPromotable(I)) {
@@ -2286,15 +2255,9 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}
-static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
- AliasSetTracker *CurAST, Loop *CurLoop,
- AAResults *AA) {
- return CurAST->getAliasSetFor(MemLoc).isMod();
-}
-
-bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags) {
+static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop, Instruction &I,
+ SinkAndHoistLICMFlags &Flags) {
// For hoisting, use the walker to determine safety
if (!Flags.getIsSink()) {
MemoryAccess *Source;
@@ -2329,17 +2292,16 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
if (Flags.tooManyMemoryAccesses())
return true;
for (auto *BB : CurLoop->getBlocks())
- if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
+ if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
return true;
// When sinking, the source block may not be part of the loop so check it.
if (!CurLoop->contains(&I))
- return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
+ return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
return false;
}
-bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
- MemoryUse &MU) {
+bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
if (const auto *Accesses = MSSA.getBlockDefs(&BB))
for (const auto &MA : *Accesses)
if (const auto *MD = dyn_cast<MemoryDef>(&MA))
diff --git a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
index 1c3ff1a61b7e..c063c0d3c88a 100644
--- a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -8,6 +8,7 @@
#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
using namespace llvm;
#define DEBUG_TYPE "loop-accesses"
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index d438d56e38ca..2b9800f11912 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -8,20 +8,15 @@
#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
#include "llvm/ADT/Sequence.h"
-#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#define DEBUG_TYPE "loop-bound-split"
@@ -33,26 +28,23 @@ using namespace PatternMatch;
namespace {
struct ConditionInfo {
/// Branch instruction with this condition
- BranchInst *BI;
+ BranchInst *BI = nullptr;
/// ICmp instruction with this condition
- ICmpInst *ICmp;
+ ICmpInst *ICmp = nullptr;
/// Preciate info
- ICmpInst::Predicate Pred;
+ ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
/// AddRec llvm value
- Value *AddRecValue;
+ Value *AddRecValue = nullptr;
/// Non PHI AddRec llvm value
Value *NonPHIAddRecValue;
/// Bound llvm value
- Value *BoundValue;
+ Value *BoundValue = nullptr;
/// AddRec SCEV
- const SCEVAddRecExpr *AddRecSCEV;
+ const SCEVAddRecExpr *AddRecSCEV = nullptr;
/// Bound SCEV
- const SCEV *BoundSCEV;
+ const SCEV *BoundSCEV = nullptr;
- ConditionInfo()
- : BI(nullptr), ICmp(nullptr), Pred(ICmpInst::BAD_ICMP_PREDICATE),
- AddRecValue(nullptr), BoundValue(nullptr), AddRecSCEV(nullptr),
- BoundSCEV(nullptr) {}
+ ConditionInfo() = default;
};
} // namespace
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 57e36e5b9b90..9590fbbb1994 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -30,9 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "loop-data-prefetch"
@@ -236,15 +233,14 @@ struct Prefetch {
/// The address formula for this prefetch as returned by ScalarEvolution.
const SCEVAddRecExpr *LSCEVAddRec;
/// The point of insertion for the prefetch instruction.
- Instruction *InsertPt;
+ Instruction *InsertPt = nullptr;
/// True if targeting a write memory access.
- bool Writes;
+ bool Writes = false;
/// The (first seen) prefetched instruction.
- Instruction *MemI;
+ Instruction *MemI = nullptr;
/// Constructor to create a new Prefetch for \p I.
- Prefetch(const SCEVAddRecExpr *L, Instruction *I)
- : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
+ Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
addInstruction(I);
};
@@ -303,7 +299,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
}
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
}
- unsigned LoopSize = Metrics.NumInsts;
+
+ if (!Metrics.NumInsts.isValid())
+ return MadeChange;
+
+ unsigned LoopSize = *Metrics.NumInsts.getValue();
if (!LoopSize)
LoopSize = 1;
diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 361d6c0d9381..93f3cd704196 100644
--- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,12 +17,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
@@ -192,13 +192,13 @@ getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
getValueOnFirstIteration(BO->getOperand(0), FirstIterValue, SQ);
Value *RHS =
getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
- FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
+ FirstIterV = simplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
} else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
Value *LHS =
getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
Value *RHS =
getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
- FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
+ FirstIterV = simplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
} else if (auto *Select = dyn_cast<SelectInst>(V)) {
Value *Cond =
getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
@@ -458,13 +458,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
if (ExitBlock && isLoopNeverExecuted(L)) {
LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
// We need to forget the loop before setting the incoming values of the exit
- // phis to undef, so we properly invalidate the SCEV expressions for those
+ // phis to poison, so we properly invalidate the SCEV expressions for those
// phis.
SE.forgetLoop(L);
- // Set incoming value to undef for phi nodes in the exit block.
+ // Set incoming value to poison for phi nodes in the exit block.
for (PHINode &P : ExitBlock->phis()) {
std::fill(P.incoming_values().begin(), P.incoming_values().end(),
- UndefValue::get(P.getType()));
+ PoisonValue::get(P.getType()));
}
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(),
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 0f4c767c1e4c..03a10cb36bb6 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -47,7 +47,6 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
@@ -231,7 +230,7 @@ public:
// having to update as many def-use and use-def chains.
for (auto *Inst : reverse(Unused)) {
if (!Inst->use_empty())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
Inst->eraseFromParent();
}
}
@@ -601,7 +600,7 @@ private:
{LLVMLoopDistributeFollowupAll,
Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
: LLVMLoopDistributeFollowupCoincident});
- if (PartitionID.hasValue()) {
+ if (PartitionID) {
Loop *NewLoop = Part->getDistributedLoop();
NewLoop->setLoopID(PartitionID.getValue());
}
@@ -770,19 +769,19 @@ public:
// Don't distribute the loop if we need too many SCEV run-time checks, or
// any if it's illegal.
- const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
+ const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
return fail("RuntimeCheckWithConvergent",
"may not insert runtime check with convergent operation");
}
- if (Pred.getComplexity() > (IsForced.getValueOr(false)
+ if (Pred.getComplexity() > (IsForced.value_or(false)
? PragmaDistributeSCEVCheckThreshold
: DistributeSCEVCheckThreshold))
return fail("TooManySCEVRuntimeChecks",
"too many SCEV run-time checks needed.\n");
- if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
+ if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
return fail("HeuristicDisabled", "distribution heuristic disabled");
LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
@@ -859,7 +858,7 @@ public:
/// Provide diagnostics then \return with false.
bool fail(StringRef RemarkName, StringRef Message) {
LLVMContext &Ctx = F->getContext();
- bool Forced = isForced().getValueOr(false);
+ bool Forced = isForced().value_or(false);
LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
@@ -991,7 +990,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
// If distribution was forced for the specific loop to be
// enabled/disabled, follow that. Otherwise use the global flag.
- if (LDL.isForced().getValueOr(EnableLoopDistribute))
+ if (LDL.isForced().value_or(EnableLoopDistribute))
Changed |= LDL.processLoop(GetLAA);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index c46db4e63bfe..f36193fc468e 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -54,6 +54,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -64,12 +65,12 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -210,8 +211,9 @@ struct FlattenInfo {
if (!MatchedItCount)
return false;
- // Look through extends if the IV has been widened.
- if (Widened &&
+ // Look through extends if the IV has been widened. Don't look through
+ // extends if we already looked through a trunc.
+ if (Widened && IsAdd &&
(isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
"Unexpected type mismatch in types after widening");
@@ -410,7 +412,7 @@ static bool findLoopComponents(
// pre-header and one from the latch. The incoming latch value is the
// increment variable.
Increment =
- dyn_cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
+ cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
if (Increment->hasNUsesOrMore(3)) {
LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
return false;
@@ -921,7 +923,7 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ MSSAU ? MSSAU.getPointer() : nullptr);
if (!Changed)
return PreservedAnalyses::all();
@@ -987,7 +989,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
for (Loop *L : *LI) {
auto LN = LoopNest::getLoopNest(*L, *SE);
Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ MSSAU ? MSSAU.getPointer() : nullptr);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index bf4d275e04ba..d94b767c7b63 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -117,7 +117,7 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
"Use the dependence analysis interface"),
clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
"Use all available analyses")),
- cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
+ cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL));
static cl::opt<unsigned> FusionPeelMaxCount(
"loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
@@ -128,7 +128,7 @@ static cl::opt<unsigned> FusionPeelMaxCount(
static cl::opt<bool>
VerboseFusionDebugging("loop-fusion-verbose-debug",
cl::desc("Enable verbose debugging for Loop Fusion"),
- cl::Hidden, cl::init(false), cl::ZeroOrMore);
+ cl::Hidden, cl::init(false));
#endif
namespace {
@@ -178,12 +178,12 @@ struct FusionCandidate {
/// FusionCandidateCompare function, required by FusionCandidateSet to
/// determine where the FusionCandidate should be inserted into the set. These
/// are used to establish ordering of the FusionCandidates based on dominance.
- const DominatorTree *DT;
+ DominatorTree &DT;
const PostDominatorTree *PDT;
OptimizationRemarkEmitter &ORE;
- FusionCandidate(Loop *L, const DominatorTree *DT,
+ FusionCandidate(Loop *L, DominatorTree &DT,
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
TTI::PeelingPreferences PP)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
@@ -192,7 +192,6 @@ struct FusionCandidate {
GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
- assert(DT && "Expected non-null DT!");
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
// the memory reads and writes If any instructions that prevent fusion are
@@ -391,7 +390,7 @@ struct FusionCandidateCompare {
/// IF RHS dominates LHS and LHS post-dominates RHS, return false;
bool operator()(const FusionCandidate &LHS,
const FusionCandidate &RHS) const {
- const DominatorTree *DT = LHS.DT;
+ const DominatorTree *DT = &(LHS.DT);
BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
@@ -646,7 +645,7 @@ private:
for (Loop *L : LV) {
TTI::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
- FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
+ FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
if (!CurrCand.isEligibleForFusion(SE))
continue;
@@ -991,7 +990,7 @@ private:
FuseCounter);
FusionCandidate FusedCand(
- performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
+ performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
FC0Copy.PP);
FusedCand.verify();
assert(FusedCand.isEligibleForFusion(SE) &&
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 318c4c06f0f7..88d6a7aff3c9 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -61,7 +61,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -346,7 +345,7 @@ INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
static void deleteDeadInstruction(Instruction *I) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -798,7 +797,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
}
/// processLoopMemIntrinsic - Template function for calling different processor
-/// functions based on mem instrinsic type.
+/// functions based on mem intrinsic type.
template <typename MemInst>
bool LoopIdiomRecognize::processLoopMemIntrinsic(
BasicBlock *BB,
@@ -995,9 +994,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
- MaybeAlign(MSI->getDestAlignment()),
- SplatValue, MSI, MSIs, Ev, BECount,
- IsNegStride, /*IsLoopMemset=*/true);
+ MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev,
+ BECount, IsNegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -1101,6 +1099,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
+ Module *M = TheStore->getModule();
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
@@ -1173,6 +1172,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
CallInst *NewCall;
if (SplatValue) {
AAMDNodes AATags = TheStore->getAAMetadata();
+ for (Instruction *Store : Stores)
+ AATags = AATags.merge(Store->getAAMetadata());
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
AATags = AATags.extendTo(CI->getZExtValue());
else
@@ -1181,15 +1182,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
NewCall = Builder.CreateMemSet(
BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
/*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
- } else {
+ } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
- Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
- FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntIdxTy);
- inferLibFuncAttributes(M, FuncName, *TLI);
+ FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
+ Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
+ inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1200,7 +1200,9 @@ bool LoopIdiomRecognize::processLoopStridedStore(
GV->setAlignment(Align(16));
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
- }
+ } else
+ return Changed;
+
NewCall->setDebugLoc(TheStore->getDebugLoc());
if (MSSAU) {
@@ -1275,9 +1277,8 @@ class MemmoveVerifier {
public:
explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
const DataLayout &DL)
- : DL(DL), LoadOff(0), StoreOff(0),
- BP1(llvm::GetPointerBaseWithConstantOffset(
- LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+ : DL(DL), BP1(llvm::GetPointerBaseWithConstantOffset(
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
BP2(llvm::GetPointerBaseWithConstantOffset(
StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
IsSameObject(BP1 == BP2) {}
@@ -1307,8 +1308,8 @@ public:
private:
const DataLayout &DL;
- int64_t LoadOff;
- int64_t StoreOff;
+ int64_t LoadOff = 0;
+ int64_t StoreOff = 0;
const Value *BP1;
const Value *BP2;
@@ -1420,26 +1421,19 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
- if (IsMemCpy)
- IgnoredInsts.erase(TheStore);
MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
+ if (IsMemCpy && !Verifier.IsSameObject)
+ IgnoredInsts.erase(TheStore);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSizeSCEV, *AA, IgnoredInsts)) {
- if (!IsMemCpy) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
- TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- // At this point loop may access load only for memcpy in same underlying
- // object. If that's not the case bail out.
- if (!Verifier.IsSameObject)
- return Changed;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
+ return Changed;
}
bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
@@ -1487,7 +1481,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
return Changed;
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
- assert((StoreAlign.hasValue() && LoadAlign.hasValue()) &&
+ assert((StoreAlign && LoadAlign) &&
"Expect unordered load/store to have align.");
if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize)
return Changed;
diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index b9e63a4bc06f..4249512ea0f8 100644
--- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,21 +24,17 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include <algorithm>
#include <utility>
using namespace llvm;
@@ -101,7 +96,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!IsFirstIteration && !ToSimplify->count(&I))
continue;
- Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I));
+ Value *V = simplifyInstruction(&I, SQ.getWithInstruction(&I));
if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
continue;
@@ -109,6 +104,10 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
auto *UserI = cast<Instruction>(U.getUser());
U.set(V);
+ // Do not bother dealing with unreachable code.
+ if (!DT.isReachableFromEntry(UserI->getParent()))
+ continue;
+
// If the instruction is used by a PHI node we have already processed
// we'll need to iterate on the loop body to converge, so add it to
// the next set.
@@ -222,7 +221,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU ? MSSAU.getPointer() : nullptr))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index c2b065c4eb31..1d3023d04463 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
@@ -33,7 +34,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -44,7 +44,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
@@ -120,8 +119,6 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
std::vector<char> Dep;
Instruction *Src = cast<Instruction>(*I);
Instruction *Dst = cast<Instruction>(*J);
- if (Src == Dst)
- continue;
// Ignore Input dependencies.
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
continue;
@@ -270,26 +267,28 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
return true;
}
-static LoopVector populateWorklist(Loop &L) {
+static void populateWorklist(Loop &L, LoopVector &LoopList) {
LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() << " Loop: %"
<< L.getHeader()->getName() << '\n');
- LoopVector LoopList;
+ assert(LoopList.empty() && "LoopList should initially be empty!");
Loop *CurrentLoop = &L;
const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
while (!Vec->empty()) {
// The current loop has multiple subloops in it hence it is not tightly
// nested.
// Discard all loops above it added into Worklist.
- if (Vec->size() != 1)
- return {};
+ if (Vec->size() != 1) {
+ LoopList = {};
+ return;
+ }
LoopList.push_back(CurrentLoop);
CurrentLoop = Vec->front();
Vec = &CurrentLoop->getSubLoops();
}
LoopList.push_back(CurrentLoop);
- return LoopList;
+ return;
}
namespace {
@@ -360,8 +359,10 @@ public:
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
/// Check if the loop interchange is profitable.
- bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
- CharMatrix &DepMatrix);
+ bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
+ unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap);
private:
int getInstrOrderCost();
@@ -412,23 +413,26 @@ struct LoopInterchange {
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;
+ std::unique_ptr<CacheCost> CC = nullptr;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
- DominatorTree *DT, OptimizationRemarkEmitter *ORE)
- : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
+ DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
+ OptimizationRemarkEmitter *ORE)
+ : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
bool run(Loop *L) {
if (L->getParentLoop())
return false;
-
- return processLoopList(populateWorklist(*L));
+ SmallVector<Loop *, 8> LoopList;
+ populateWorklist(*L, LoopList);
+ return processLoopList(LoopList);
}
bool run(LoopNest &LN) {
- const auto &LoopList = LN.getLoops();
+ SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
for (unsigned I = 1; I < LoopList.size(); ++I)
if (LoopList[I]->getParentLoop() != LoopList[I - 1])
return false;
@@ -460,7 +464,7 @@ struct LoopInterchange {
return LoopList.size() - 1;
}
- bool processLoopList(ArrayRef<Loop *> LoopList) {
+ bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
bool Changed = false;
unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth < 2) {
@@ -500,27 +504,55 @@ struct LoopInterchange {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- // Move the selected loop outwards to the best possible position.
- Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
- for (unsigned i = SelecLoopId; i > 0; i--) {
- bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
- i - 1, DependencyMatrix);
- if (!Interchanged)
- return Changed;
- // Update the DependencyMatrix
- interChangeDependencies(DependencyMatrix, i, i - 1);
+ // Obtain the loop vector returned from loop cache analysis beforehand,
+ // and put each <Loop, index> pair into a map for constant time query
+ // later. Indices in loop vector reprsent the optimal order of the
+ // corresponding loop, e.g., given a loopnest with depth N, index 0
+ // indicates the loop should be placed as the outermost loop and index N
+ // indicates the loop should be placed as the innermost loop.
+ //
+ // For the old pass manager CacheCost would be null.
+ DenseMap<const Loop *, unsigned> CostMap;
+ if (CC != nullptr) {
+ const auto &LoopCosts = CC->getLoopCosts();
+ for (unsigned i = 0; i < LoopCosts.size(); i++) {
+ CostMap[LoopCosts[i].first] = i;
+ }
+ }
+ // We try to achieve the globally optimal memory access for the loopnest,
+ // and do interchange based on a bubble-sort fasion. We start from
+ // the innermost loop, move it outwards to the best possible position
+ // and repeat this process.
+ for (unsigned j = SelecLoopId; j > 0; j--) {
+ bool ChangedPerIter = false;
+ for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
+ bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
+ DependencyMatrix, CostMap);
+ if (!Interchanged)
+ continue;
+ // Loops interchanged, update LoopList accordingly.
+ std::swap(LoopList[i - 1], LoopList[i]);
+ // Update the DependencyMatrix
+ interChangeDependencies(DependencyMatrix, i, i - 1);
#ifdef DUMP_DEP_MATRICIES
- LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
- printDepMatrix(DependencyMatrix);
+ LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
+ printDepMatrix(DependencyMatrix);
#endif
- Changed |= Interchanged;
+ ChangedPerIter |= Interchanged;
+ Changed |= Interchanged;
+ }
+ // Early abort if there was no interchange during an entire round of
+ // moving loops outwards.
+ if (!ChangedPerIter)
+ break;
}
return Changed;
}
bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId,
- std::vector<std::vector<char>> &DependencyMatrix) {
+ std::vector<std::vector<char>> &DependencyMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
@@ -530,7 +562,8 @@ struct LoopInterchange {
}
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
- if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+ DependencyMatrix, CostMap)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
@@ -733,8 +766,12 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
if (PHI->getNumIncomingValues() == 1)
continue;
RecurrenceDescriptor RD;
- if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+ if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
+ // Detect floating point reduction only when it can be reordered.
+ if (RD.getExactFPMathInst() != nullptr)
+ return nullptr;
return PHI;
+ }
return nullptr;
}
}
@@ -893,28 +930,23 @@ areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
for (PHINode &PHI : LoopNestExit->phis()) {
- // FIXME: We currently are not able to detect floating point reductions
- // and have to use floating point PHIs as a proxy to prevent
- // interchanging in the presence of floating point reductions.
- if (PHI.getType()->isFloatingPointTy())
- return false;
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
- Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
- if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
- continue;
+ Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
+ if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
+ continue;
- // The incoming value is defined in the outer loop latch. Currently we
- // only support that in case the outer loop latch has a single predecessor.
- // This guarantees that the outer loop latch is executed if and only if
- // the inner loop is executed (because tightlyNested() guarantees that the
- // outer loop header only branches to the inner loop or the outer loop
- // latch).
- // FIXME: We could weaken this logic and allow multiple predecessors,
- // if the values are produced outside the loop latch. We would need
- // additional logic to update the PHI nodes in the exit block as
- // well.
- if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
- return false;
+ // The incoming value is defined in the outer loop latch. Currently we
+ // only support that in case the outer loop latch has a single predecessor.
+ // This guarantees that the outer loop latch is executed if and only if
+ // the inner loop is executed (because tightlyNested() guarantees that the
+ // outer loop header only branches to the inner loop or the outer loop
+ // latch).
+ // FIXME: We could weaken this logic and allow multiple predecessors,
+ // if the values are produced outside the loop latch. We would need
+ // additional logic to update the PHI nodes in the exit block as
+ // well.
+ if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
+ return false;
}
}
return true;
@@ -1125,21 +1157,33 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
return !DepMatrix.empty();
}
-bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
- unsigned OuterLoopId,
- CharMatrix &DepMatrix) {
- // TODO: Add better profitability checks.
- // e.g
- // 1) Construct dependency matrix and move the one with no loop carried dep
- // inside to enable vectorization.
+bool LoopInterchangeProfitability::isProfitable(
+ const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
+ unsigned OuterLoopId, CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
+ // TODO: Remove the legacy cost model.
- // This is rough cost estimation algorithm. It counts the good and bad order
- // of induction variables in the instruction and allows reordering if number
- // of bad orders is more than good.
- int Cost = getInstrOrderCost();
- LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
- if (Cost < -LoopInterchangeCostThreshold)
- return true;
+ // This is the new cost model returned from loop cache analysis.
+ // A smaller index means the loop should be placed an outer loop, and vice
+ // versa.
+ if (CostMap.find(InnerLoop) != CostMap.end() &&
+ CostMap.find(OuterLoop) != CostMap.end()) {
+ unsigned InnerIndex = 0, OuterIndex = 0;
+ InnerIndex = CostMap.find(InnerLoop)->second;
+ OuterIndex = CostMap.find(OuterLoop)->second;
+ LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
+ << ", OuterIndex = " << OuterIndex << "\n");
+ if (InnerIndex < OuterIndex)
+ return true;
+ } else {
+ // Legacy cost model: this is rough cost estimation algorithm. It counts the
+ // good and bad order of induction variables in the instruction and allows
+ // reordering if number of bad orders is more than good.
+ int Cost = getInstrOrderCost();
+ LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
+ if (Cost < -LoopInterchangeCostThreshold)
+ return true;
+ }
// It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
@@ -1150,10 +1194,8 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
InnerLoop->getStartLoc(),
InnerLoop->getHeader())
- << "Interchanging loops is too costly (cost="
- << ore::NV("Cost", Cost) << ", threshold="
- << ore::NV("Threshold", LoopInterchangeCostThreshold)
- << ") and it does not improve parallelism.";
+ << "Interchanging loops is too costly and it does not improve "
+ "parallelism.";
});
return false;
}
@@ -1424,9 +1466,13 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
// Incoming values are guaranteed be instructions currently.
auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
+ // In case of multi-level nested loops, follow LCSSA to find the incoming
+ // value defined from the innermost loop.
+ auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
// Skip phis with incoming values from the inner loop body, excluding the
// header and latch.
- if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
+ if (IncIInnerMost->getParent() != InnerLatch &&
+ IncIInnerMost->getParent() != InnerHeader)
continue;
assert(all_of(P.users(),
@@ -1695,8 +1741,8 @@ struct LoopInterchangeLegacyPass : public LoopPass {
auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
+ std::unique_ptr<CacheCost> CC = nullptr;
+ return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
}
};
} // namespace
@@ -1723,8 +1769,10 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
Function &F = *LN.getParent();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+ std::unique_ptr<CacheCost> CC =
+ CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
OptimizationRemarkEmitter ORE(&F);
- if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
+ if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 21d59936616b..1877ac1dfd08 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -61,7 +61,6 @@
#include <algorithm>
#include <cassert>
#include <forward_list>
-#include <set>
#include <tuple>
#include <utility>
@@ -213,7 +212,8 @@ public:
continue;
// Only progagate the value if they are of the same type.
- if (Store->getPointerOperandType() != Load->getPointerOperandType())
+ if (Store->getPointerOperandType() != Load->getPointerOperandType() ||
+ getLoadStoreType(Store) != getLoadStoreType(Load))
continue;
Candidates.emplace_front(Load, Store);
@@ -528,7 +528,7 @@ public:
return false;
}
- if (LAI.getPSE().getUnionPredicate().getComplexity() >
+ if (LAI.getPSE().getPredicate().getComplexity() >
LoadElimSCEVCheckThreshold) {
LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
return false;
@@ -539,7 +539,7 @@ public:
return false;
}
- if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
+ if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
if (LAI.hasConvergentOp()) {
LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
"convergent calls\n");
@@ -706,8 +706,12 @@ FunctionPass *llvm::createLoopLoadEliminationPass() {
PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 6c783848432b..d20d275ea60c 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -8,14 +8,12 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
@@ -311,12 +309,12 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
#ifndef NDEBUG
// LoopAnalysisResults should always be valid.
- // Note that we don't LAR.SE.verify() because that can change observed SE
- // queries. See PR44815.
if (VerifyDomInfo)
LAR.DT.verify();
if (VerifyLoopInfo)
LAR.LI.verify(LAR.DT);
+ if (VerifySCEV)
+ LAR.SE.verify();
if (LAR.MSSA && VerifyMemorySSA)
LAR.MSSA->verifyMemorySSA();
#endif
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index aa7e79a589f2..d0ee5b47a8ca 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -188,7 +188,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
@@ -244,7 +243,7 @@ struct LoopICmp {
LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
const SCEV *Limit)
: Pred(Pred), IV(IV), Limit(Limit) {}
- LoopICmp() {}
+ LoopICmp() = default;
void dump() {
dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
<< ", Limit = " << *Limit << "\n";
@@ -778,7 +777,7 @@ unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
Guard)) {
- Checks.push_back(NewRangeCheck.getValue());
+ Checks.push_back(*NewRangeCheck);
NumWidened++;
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 9d22eceb987f..f4ef22562341 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -29,15 +29,11 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -59,7 +55,6 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <cstdlib>
#include <iterator>
#include <map>
#include <utility>
@@ -559,12 +554,12 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
}
// Must be a CMP or an ext (of a value with nsw) then CMP
else {
- Instruction *UUser = dyn_cast<Instruction>(UU);
+ auto *UUser = cast<Instruction>(UU);
// Skip SExt if we are extending an nsw value
// TODO: Allow ZExt too
- if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
+ if (BO->hasNoSignedWrap() && UUser->hasOneUse() &&
isa<SExtInst>(UUser))
- UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
+ UUser = cast<Instruction>(*(UUser->user_begin()));
if (!isCompareUsedByBranch(UUser))
return false;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 5ba137b1c85f..d9c33b5f335a 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopRotation.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -22,9 +22,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -62,8 +60,8 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
MSSAU = MemorySSAUpdater(AR.MSSA);
bool Changed =
LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false,
- Threshold, false, PrepareForLTO || PrepareForLTOOption);
+ MSSAU ? MSSAU.getPointer() : nullptr, SQ, false, Threshold,
+ false, PrepareForLTO || PrepareForLTOOption);
if (!Changed)
return PreservedAnalyses::all();
@@ -133,9 +131,8 @@ public:
: MaxHeaderSize;
return LoopRotation(L, LI, TTI, AC, &DT, &SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
- false, Threshold, false,
- PrepareForLTO || PrepareForLTOOption);
+ MSSAU ? MSSAU.getPointer() : nullptr, SQ, false,
+ Threshold, false, PrepareForLTO || PrepareForLTOOption);
}
};
} // end namespace
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index d3fcba10c275..b7e0e32780b4 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -16,28 +16,21 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -261,13 +254,17 @@ private:
assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() &&
"Malformed block sets?");
- // Now, all exit blocks that are not marked as live are dead.
+ // Now, all exit blocks that are not marked as live are dead, if all their
+ // predecessors are in the loop. This may not be the case, as the input loop
+ // may not by in loop-simplify/canonical form.
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
SmallPtrSet<BasicBlock *, 8> UniqueDeadExits;
for (auto *ExitBlock : ExitBlocks)
if (!LiveExitBlocks.count(ExitBlock) &&
- UniqueDeadExits.insert(ExitBlock).second)
+ UniqueDeadExits.insert(ExitBlock).second &&
+ all_of(predecessors(ExitBlock),
+ [this](BasicBlock *Pred) { return L.contains(Pred); }))
DeadExitBlocks.push_back(ExitBlock);
// Whether or not the edge From->To will still be present in graph after the
@@ -374,7 +371,7 @@ private:
DeadInstructions.emplace_back(LandingPad);
for (Instruction *I : DeadInstructions) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -704,8 +701,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
MSSAU = MemorySSAUpdater(AR.MSSA);
bool DeleteCurrentLoop = false;
if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
- DeleteCurrentLoop))
+ MSSAU ? MSSAU.getPointer() : nullptr, DeleteCurrentLoop))
return PreservedAnalyses::all();
if (DeleteCurrentLoop)
@@ -739,9 +735,9 @@ public:
if (MSSAA && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
bool DeleteCurrentLoop = false;
- bool Changed = simplifyLoopCFG(
- *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
- DeleteCurrentLoop);
+ bool Changed =
+ simplifyLoopCFG(*L, DT, LI, SE, MSSAU ? MSSAU.getPointer() : nullptr,
+ DeleteCurrentLoop);
if (DeleteCurrentLoop)
LPM.markLoopAsDeleted(*L);
return Changed;
diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index c9c9e60d0921..dce1af475fb1 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -34,24 +34,18 @@
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -70,14 +64,6 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
"max-uses-for-sinking", cl::Hidden, cl::init(30),
cl::desc("Do not sink instructions that have too many uses."));
-static cl::opt<bool> EnableMSSAInLoopSink(
- "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
- cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
-
-static cl::opt<bool> EnableMSSAInLegacyLoopSink(
- "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
- cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
-
/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
@@ -279,9 +265,8 @@ static bool sinkInstruction(
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
DominatorTree &DT,
BlockFrequencyInfo &BFI,
- ScalarEvolution *SE,
- AliasSetTracker *CurAST,
- MemorySSA *MSSA) {
+ MemorySSA &MSSA,
+ ScalarEvolution *SE) {
BasicBlock *Preheader = L.getLoopPreheader();
assert(Preheader && "Expected loop to have preheader");
@@ -297,13 +282,8 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
}))
return false;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
- if (MSSA) {
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- LICMFlags =
- std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
- }
+ MemorySSAUpdater MSSAU(&MSSA);
+ SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA);
bool Changed = false;
@@ -324,14 +304,15 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// on B (A appears after B), A needs to be sinked first before B can be
// sinked.
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+ if (isa<PHINode>(&I))
+ continue;
// No need to check for instruction's operands are loop invariant.
assert(L.hasLoopInvariantOperands(&I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
- LICMFlags.get()))
+ if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, LICMFlags))
continue;
if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
- MSSAU.get()))
+ &MSSAU))
Changed = true;
}
@@ -340,13 +321,6 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return Changed;
}
-static void computeAliasSet(Loop &L, BasicBlock &Preheader,
- AliasSetTracker &CurAST) {
- for (BasicBlock *BB : L.blocks())
- CurAST.add(*BB);
- CurAST.add(Preheader);
-}
-
PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
// Nothing to do if there are no loops.
@@ -356,10 +330,7 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
AAResults &AA = FAM.getResult<AAManager>(F);
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
-
- MemorySSA *MSSA = EnableMSSAInLoopSink
- ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
+ MemorySSA &MSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA();
// We want to do a postorder walk over the loops. Since loops are a tree this
// is equivalent to a reversed preorder walk and preorder is easy to compute
@@ -381,18 +352,11 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
if (!Preheader->getParent()->hasProfileData())
continue;
- std::unique_ptr<AliasSetTracker> CurAST;
- if (!EnableMSSAInLoopSink) {
- CurAST = std::make_unique<AliasSetTracker>(AA);
- computeAliasSet(L, *Preheader, *CurAST.get());
- }
-
// Note that we don't pass SCEV here because it is only used to invalidate
// loops in SCEV and we don't preserve (or request) SCEV at all making that
// unnecessary.
- Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
- /*ScalarEvolution*/ nullptr,
- CurAST.get(), MSSA);
+ Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, MSSA,
+ /*ScalarEvolution*/ nullptr);
} while (!PreorderLoops.empty());
if (!Changed)
@@ -400,13 +364,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
- if (MSSA) {
- PA.preserve<MemorySSAAnalysis>();
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
+ if (VerifyMemorySSA)
+ MSSA.verifyMemorySSA();
return PA;
}
@@ -432,24 +393,16 @@ struct LegacyLoopSinkPass : public LoopPass {
return false;
AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- std::unique_ptr<AliasSetTracker> CurAST;
- MemorySSA *MSSA = nullptr;
- if (EnableMSSAInLegacyLoopSink)
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- else {
- CurAST = std::make_unique<AliasSetTracker>(AA);
- computeAliasSet(*L, *Preheader, *CurAST.get());
- }
-
bool Changed = sinkLoopInvariantInstructions(
*L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
- SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
+ MSSA, SE ? &SE->getSE() : nullptr);
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ MSSA.verifyMemorySSA();
return Changed;
}
@@ -458,10 +411,8 @@ struct LegacyLoopSinkPass : public LoopPass {
AU.setPreservesCFG();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
getLoopAnalysisUsage(AU);
- if (EnableMSSAInLegacyLoopSink) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 654f0d2a03a8..9959e408e2e2 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -78,6 +78,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -91,9 +92,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
@@ -114,12 +113,12 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <cstdlib>
#include <iterator>
#include <limits>
#include <map>
@@ -142,10 +141,7 @@ static const unsigned MaxIVUsers = 200;
/// the salvaging is not too expensive for the compiler.
static const unsigned MaxSCEVSalvageExpressionSize = 64;
-// Temporary flag to cleanup congruent phis after LSR phi expansion.
-// It's currently disabled until we can determine whether it's truly useful or
-// not. The flag should be removed after the v3.0 release.
-// This is now needed for ivchains.
+// Cleanup congruent phis after LSR phi expansion.
static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
@@ -481,6 +477,12 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
canonicalize(*L);
}
+static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {
+ return SCEVExprContains(S, [&L](const SCEV *S) {
+ return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
+}
+
/// Check whether or not this formula satisfies the canonical
/// representation.
/// \see Formula::BaseRegs.
@@ -494,18 +496,15 @@ bool Formula::isCanonical(const Loop &L) const {
if (Scale == 1 && BaseRegs.empty())
return false;
- const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
- if (SAR && SAR->getLoop() == &L)
+ if (containsAddRecDependentOnLoop(ScaledReg, L))
return true;
// If ScaledReg is not a recurrent expr, or it is but its loop is not current
// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
// loop, we want to swap the reg in BaseRegs with ScaledReg.
- auto I = find_if(BaseRegs, [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ return none_of(BaseRegs, [&L](const SCEV *S) {
+ return containsAddRecDependentOnLoop(S, L);
});
- return I == BaseRegs.end();
}
/// Helper method to morph a formula into its canonical representation.
@@ -537,11 +536,9 @@ void Formula::canonicalize(const Loop &L) {
// If ScaledReg is an invariant with respect to L, find the reg from
// BaseRegs containing the recurrent expr related with Loop L. Swap the
// reg with ScaledReg.
- const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
- if (!SAR || SAR->getLoop() != &L) {
- auto I = find_if(BaseRegs, [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
+ auto I = find_if(BaseRegs, [&L](const SCEV *S) {
+ return containsAddRecDependentOnLoop(S, L);
});
if (I != BaseRegs.end())
std::swap(ScaledReg, *I);
@@ -1070,7 +1067,7 @@ public:
C.ScaleCost = 0;
}
- bool isLess(Cost &Other);
+ bool isLess(const Cost &Other);
void Lose();
@@ -1358,6 +1355,8 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+ if (isLoser())
+ return;
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
// Tally up the registers.
unsigned PrevAddRecCost = C.AddRecCost;
@@ -1467,7 +1466,7 @@ void Cost::Lose() {
}
/// Choose the lower cost.
-bool Cost::isLess(Cost &Other) {
+bool Cost::isLess(const Cost &Other) {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
C.Insns != Other.C.Insns)
return C.Insns < Other.C.Insns;
@@ -4081,23 +4080,24 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
- if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
- // TODO: This could be optimized to avoid all the copying.
- Formula F = Base;
- F.ScaledReg = Quotient;
- F.deleteBaseReg(F.BaseRegs[i]);
- // The canonical representation of 1*reg is reg, which is already in
- // Base. In that case, do not try to insert the formula, it will be
- // rejected anyway.
- if (F.Scale == 1 && (F.BaseRegs.empty() ||
- (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
- continue;
- // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
- // non canonical Formula with ScaledReg's loop not being L.
- if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
- F.canonicalize(*L);
- (void)InsertFormula(LU, LUIdx, F);
- }
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
+ if (!Quotient->isZero()) {
+ // TODO: This could be optimized to avoid all the copying.
+ Formula F = Base;
+ F.ScaledReg = Quotient;
+ F.deleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && (F.BaseRegs.empty() ||
+ (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
+ continue;
+ // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
+ // non canonical Formula with ScaledReg's loop not being L.
+ if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
+ F.canonicalize(*L);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
}
}
@@ -5601,6 +5601,27 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
DeadInsts.emplace_back(OperandIsInstr);
}
+// Check if there are any loop exit values which are only used once within the
+// loop which may potentially be optimized with a call to rewriteLoopExitValue.
+static bool LoopExitValHasSingleUse(Loop *L) {
+ BasicBlock *ExitBB = L->getExitBlock();
+ if (!ExitBB)
+ return false;
+
+ for (PHINode &ExitPhi : ExitBB->phis()) {
+ if (ExitPhi.getNumIncomingValues() != 1)
+ break;
+
+ BasicBlock *Pred = ExitPhi.getIncomingBlock(0);
+ Value *IVNext = ExitPhi.getIncomingValueForBlock(Pred);
+ // One use would be the exit phi node, and there should be only one other
+ // use for this to be considered.
+ if (IVNext->getNumUses() == 2)
+ return true;
+ }
+ return false;
+}
+
/// Rewrite all the fixup locations with new values, following the chosen
/// solution.
void LSRInstance::ImplementSolution(
@@ -5894,40 +5915,57 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
}
namespace {
+
+/// Enables more convenient iteration over a DWARF expression vector.
+static iterator_range<llvm::DIExpression::expr_op_iterator>
+ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
+ llvm::DIExpression::expr_op_iterator Begin =
+ llvm::DIExpression::expr_op_iterator(Expr.begin());
+ llvm::DIExpression::expr_op_iterator End =
+ llvm::DIExpression::expr_op_iterator(Expr.end());
+ return {Begin, End};
+}
+
struct SCEVDbgValueBuilder {
SCEVDbgValueBuilder() = default;
- SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
- Values = Base.Values;
+ SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
+
+ void clone(const SCEVDbgValueBuilder &Base) {
+ LocationOps = Base.LocationOps;
Expr = Base.Expr;
}
+ void clear() {
+ LocationOps.clear();
+ Expr.clear();
+ }
+
/// The DIExpression as we translate the SCEV.
SmallVector<uint64_t, 6> Expr;
/// The location ops of the DIExpression.
- SmallVector<llvm::ValueAsMetadata *, 2> Values;
+ SmallVector<Value *, 2> LocationOps;
void pushOperator(uint64_t Op) { Expr.push_back(Op); }
void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
/// in the set of values referenced by the expression.
- void pushValue(llvm::Value *V) {
+ void pushLocation(llvm::Value *V) {
Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
- auto *It =
- std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V));
+ auto *It = std::find(LocationOps.begin(), LocationOps.end(), V);
unsigned ArgIndex = 0;
- if (It != Values.end()) {
- ArgIndex = std::distance(Values.begin(), It);
+ if (It != LocationOps.end()) {
+ ArgIndex = std::distance(LocationOps.begin(), It);
} else {
- ArgIndex = Values.size();
- Values.push_back(llvm::ValueAsMetadata::get(V));
+ ArgIndex = LocationOps.size();
+ LocationOps.push_back(V);
}
Expr.push_back(ArgIndex);
}
void pushValue(const SCEVUnknown *U) {
llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
- pushValue(V);
+ pushLocation(V);
}
bool pushConst(const SCEVConstant *C) {
@@ -5938,6 +5976,12 @@ struct SCEVDbgValueBuilder {
return true;
}
+ // Iterating the expression as DWARF ops is convenient when updating
+ // DWARF_OP_LLVM_args.
+ iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
+ return ToDwarfOpIter(Expr);
+ }
+
/// Several SCEV types are sequences of the same arithmetic operator applied
/// to constants and values that may be extended or truncated.
bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
@@ -5979,7 +6023,7 @@ struct SCEVDbgValueBuilder {
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!U->getValue())
return false;
- pushValue(U->getValue());
+ pushLocation(U->getValue());
} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
@@ -6010,52 +6054,6 @@ struct SCEVDbgValueBuilder {
return Success;
}
- void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) {
- // Re-state assumption that this dbg.value is not variadic. Any remaining
- // opcodes in its expression operate on a single value already on the
- // expression stack. Prepend our operations, which will re-compute and
- // place that value on the expression stack.
- assert(!DI.hasArgList());
- auto *NewExpr =
- DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true);
- DI.setExpression(NewExpr);
-
- auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(Values);
- DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef));
- }
-
- /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the
- /// location op index 0.
- void setShortFinalExpression(llvm::DbgValueInst &DI,
- const DIExpression *OldExpr) {
- assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) &&
- "Expected DW_OP_llvm_arg and 0.");
- DI.replaceVariableLocationOp(
- 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0]));
-
- // See setFinalExpression: prepend our opcodes on the start of any old
- // expression opcodes.
- assert(!DI.hasArgList());
- llvm::SmallVector<uint64_t, 6> FinalExpr(llvm::drop_begin(Expr, 2));
- auto *NewExpr =
- DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true);
- DI.setExpression(NewExpr);
- }
-
- /// Once the IV and variable SCEV translation is complete, write it to the
- /// source DVI.
- void applyExprToDbgValue(llvm::DbgValueInst &DI,
- const DIExpression *OldExpr) {
- assert(!Expr.empty() && "Unexpected empty expression.");
- // Emit a simpler form if only a single location is referenced.
- if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg &&
- Expr[1] == 0) {
- setShortFinalExpression(DI, OldExpr);
- } else {
- setFinalExpression(DI, OldExpr);
- }
- }
-
/// Return true if the combination of arithmetic operator and underlying
/// SCEV constant value is an identity function.
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
@@ -6104,6 +6102,48 @@ struct SCEVDbgValueBuilder {
return true;
}
+ /// Create an expression that is an offset from a value (usually the IV).
+ void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
+ pushLocation(OffsetValue);
+ DIExpression::appendOffset(Expr, Offset);
+ LLVM_DEBUG(
+ dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
+ << std::to_string(Offset) << "\n");
+ }
+
+ /// Combine a translation of the SCEV and the IV to create an expression that
+ /// recovers a location's value.
+ /// returns true if an expression was created.
+ bool createIterCountExpr(const SCEV *S,
+ const SCEVDbgValueBuilder &IterationCount,
+ ScalarEvolution &SE) {
+ // SCEVs for SSA values are most frquently of the form
+ // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
+ // This is because %a is a PHI node that is not the IV. However, these
+ // SCEVs have not been observed to result in debuginfo-lossy optimisations,
+ // so its not expected this point will be reached.
+ if (!isa<SCEVAddRecExpr>(S))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
+ << '\n');
+
+ const auto *Rec = cast<SCEVAddRecExpr>(S);
+ if (!Rec->isAffine())
+ return false;
+
+ if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return false;
+
+ // Initialise a new builder with the iteration count expression. In
+ // combination with the value's SCEV this enables recovery.
+ clone(IterationCount);
+ if (!SCEVToValueExpr(*Rec, SE))
+ return false;
+
+ return true;
+ }
+
/// Convert a SCEV of a value to a DIExpression that is pushed onto the
/// builder's expression stack. The stack should already contain an
/// expression for the iteration count, so that it can be multiplied by
@@ -6133,74 +6173,294 @@ struct SCEVDbgValueBuilder {
}
return true;
}
+
+ // Append the current expression and locations to a location list and an
+ // expression list. Modify the DW_OP_LLVM_arg indexes to account for
+ // the locations already present in the destination list.
+ void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
+ SmallVectorImpl<Value *> &DestLocations) {
+ assert(!DestLocations.empty() &&
+ "Expected the locations vector to contain the IV");
+ // The DWARF_OP_LLVM_arg arguments of the expression being appended must be
+ // modified to account for the locations already in the destination vector.
+ // All builders contain the IV as the first location op.
+ assert(!LocationOps.empty() &&
+ "Expected the location ops to contain the IV.");
+ // DestIndexMap[n] contains the index in DestLocations for the nth
+ // location in this SCEVDbgValueBuilder.
+ SmallVector<uint64_t, 2> DestIndexMap;
+ for (const auto &Op : LocationOps) {
+ auto It = find(DestLocations, Op);
+ if (It != DestLocations.end()) {
+ // Location already exists in DestLocations, reuse existing ArgIndex.
+ DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
+ continue;
+ }
+ // Location is not in DestLocations, add it.
+ DestIndexMap.push_back(DestLocations.size());
+ DestLocations.push_back(Op);
+ }
+
+ for (const auto &Op : expr_ops()) {
+ if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
+ Op.appendToVector(DestExpr);
+ continue;
+ }
+
+ DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
+ // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
+ // DestIndexMap[n] contains its new index in DestLocations.
+ uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
+ DestExpr.push_back(NewIndex);
+ }
+ }
};
+/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
+/// and DIExpression.
struct DVIRecoveryRec {
+ DVIRecoveryRec(DbgValueInst *DbgValue)
+ : DVI(DbgValue), Expr(DbgValue->getExpression()),
+ HadLocationArgList(false) {}
+
DbgValueInst *DVI;
DIExpression *Expr;
- Metadata *LocationOp;
- const llvm::SCEV *SCEV;
+ bool HadLocationArgList;
+ SmallVector<WeakVH, 2> LocationOps;
+ SmallVector<const llvm::SCEV *, 2> SCEVs;
+ SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
+
+ void clear() {
+ for (auto &RE : RecoveryExprs)
+ RE.reset();
+ RecoveryExprs.clear();
+ }
+
+ ~DVIRecoveryRec() { clear(); }
};
} // namespace
-static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
- const SCEVDbgValueBuilder &IterationCount,
- ScalarEvolution &SE) {
- // LSR may add locations to previously single location-op DVIs which
- // are currently not supported.
- if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
- return;
+/// Returns the total number of DW_OP_llvm_arg operands in the expression.
+/// This helps in determining if a DIArglist is necessary or can be omitted from
+/// the dbg.value.
+static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
+ auto expr_ops = ToDwarfOpIter(Expr);
+ unsigned Count = 0;
+ for (auto Op : expr_ops)
+ if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
+ Count++;
+ return Count;
+}
- // SCEVs for SSA values are most frquently of the form
- // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
- // This is because %a is a PHI node that is not the IV. However, these
- // SCEVs have not been observed to result in debuginfo-lossy optimisations,
- // so its not expected this point will be reached.
- if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
- return;
+/// Overwrites DVI with the location and Ops as the DIExpression. This will
+/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
+/// because a DIArglist is not created for the first argument of the dbg.value.
+static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location,
+ SmallVectorImpl<uint64_t> &Ops) {
+ assert(
+ numLLVMArgOps(Ops) == 0 &&
+ "Expected expression that does not contain any DW_OP_llvm_arg operands.");
+ DVI.setRawLocation(ValueAsMetadata::get(Location));
+ DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
+}
- LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
- << *CachedDVI.SCEV << '\n');
+/// Overwrite DVI with locations placed into a DIArglist.
+static void updateDVIWithLocations(DbgValueInst &DVI,
+ SmallVectorImpl<Value *> &Locations,
+ SmallVectorImpl<uint64_t> &Ops) {
+ assert(numLLVMArgOps(Ops) != 0 &&
+ "Expected expression that references DIArglist locations using "
+ "DW_OP_llvm_arg operands.");
+ SmallVector<ValueAsMetadata *, 3> MetadataLocs;
+ for (Value *V : Locations)
+ MetadataLocs.push_back(ValueAsMetadata::get(V));
+ auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
+ DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef));
+ DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
+}
- const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
- if (!Rec->isAffine())
- return;
+/// Write the new expression and new location ops for the dbg.value. If possible
+/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
+/// can be omitted if:
+/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
+/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
+static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
+ SmallVectorImpl<Value *> &NewLocationOps,
+ SmallVectorImpl<uint64_t> &NewExpr) {
+ unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
+ if (NumLLVMArgs == 0) {
+ // Location assumed to be on the stack.
+ updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr);
+ } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
+ // There is only a single DW_OP_llvm_arg at the start of the expression,
+ // so it can be omitted along with DIArglist.
+ assert(NewExpr[1] == 0 &&
+ "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
+ llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
+ updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps);
+ } else {
+ // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
+ updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr);
+ }
- if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize)
- return;
+ // If the DIExpression was previously empty then add the stack terminator.
+ // Non-empty expressions have only had elements inserted into them and so the
+ // terminator should already be present e.g. stack_value or fragment.
+ DIExpression *SalvageExpr = DVIRec.DVI->getExpression();
+ if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
+ SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
+ DVIRec.DVI->setExpression(SalvageExpr);
+ }
+}
- // Initialise a new builder with the iteration count expression. In
- // combination with the value's SCEV this enables recovery.
- SCEVDbgValueBuilder RecoverValue(IterationCount);
- if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
- return;
+/// Cached location ops may be erased during LSR, in which case an undef is
+/// required when restoring from the cache. The type of that location is no
+/// longer available, so just use int8. The undef will be replaced by one or
+/// more locations later when a SCEVDbgValueBuilder selects alternative
+/// locations to use for the salvage.
+static Value *getValueOrUndef(WeakVH &VH, LLVMContext &C) {
+ return (VH) ? VH : UndefValue::get(llvm::Type::getInt8Ty(C));
+}
+
+/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
+static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
+ LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
+ << "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n');
+ assert(DVIRec.Expr && "Expected an expression");
+ DVIRec.DVI->setExpression(DVIRec.Expr);
- LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
- RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
- LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
+ // Even a single location-op may be inside a DIArgList and referenced with
+ // DW_OP_LLVM_arg, which is valid only with a DIArgList.
+ if (!DVIRec.HadLocationArgList) {
+ assert(DVIRec.LocationOps.size() == 1 &&
+ "Unexpected number of location ops.");
+ // LSR's unsuccessful salvage attempt may have added DIArgList, which in
+ // this case was not present before, so force the location back to a single
+ // uncontained Value.
+ Value *CachedValue =
+ getValueOrUndef(DVIRec.LocationOps[0], DVIRec.DVI->getContext());
+ DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue));
+ } else {
+ SmallVector<ValueAsMetadata *, 3> MetadataLocs;
+ for (WeakVH VH : DVIRec.LocationOps) {
+ Value *CachedValue = getValueOrUndef(VH, DVIRec.DVI->getContext());
+ MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
+ }
+ auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
+ DVIRec.DVI->setRawLocation(
+ llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef));
+ }
+ LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n');
}
-static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV,
- int64_t Offset) {
- assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value.");
- DbgValueInst *DVI = DVIRec.DVI;
- SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
- DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true);
- LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n');
- DVI->setExpression(Expr);
- llvm::Value *ValIV = dyn_cast<llvm::Value>(&IV);
- DVI->replaceVariableLocationOp(
- 0u, llvm::MetadataAsValue::get(DVI->getContext(),
- llvm::ValueAsMetadata::get(ValIV)));
- LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: "
- << *DVIRec.DVI << '\n');
+static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
+ llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
+ const SCEV *SCEVInductionVar,
+ SCEVDbgValueBuilder IterCountExpr) {
+ if (!DVIRec.DVI->isUndef())
+ return false;
+
+ // LSR may have caused several changes to the dbg.value in the failed salvage
+ // attempt. So restore the DIExpression, the location ops and also the
+ // location ops format, which is always DIArglist for multiple ops, but only
+ // sometimes for a single op.
+ restorePreTransformState(DVIRec);
+
+ // LocationOpIndexMap[i] will store the post-LSR location index of
+ // the non-optimised out location at pre-LSR index i.
+ SmallVector<int64_t, 2> LocationOpIndexMap;
+ LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
+ SmallVector<Value *, 2> NewLocationOps;
+ NewLocationOps.push_back(LSRInductionVar);
+
+ for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
+ WeakVH VH = DVIRec.LocationOps[i];
+ // Place the locations not optimised out in the list first, avoiding
+ // inserts later. The map is used to update the DIExpression's
+ // DW_OP_LLVM_arg arguments as the expression is updated.
+ if (VH && !isa<UndefValue>(VH)) {
+ NewLocationOps.push_back(VH);
+ LocationOpIndexMap[i] = NewLocationOps.size() - 1;
+ LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
+ << " now at index " << LocationOpIndexMap[i] << "\n");
+ continue;
+ }
+
+ // It's possible that a value referred to in the SCEV may have been
+ // optimised out by LSR.
+ if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
+ SE.containsUndefs(DVIRec.SCEVs[i])) {
+ LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
+ << " refers to a location that is now undef or erased. "
+ "Salvage abandoned.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
+ << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
+
+ DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
+ SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
+
+ // Create an offset-based salvage expression if possible, as it requires
+ // less DWARF ops than an iteration count-based expression.
+ if (Optional<APInt> Offset =
+ SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
+ if (Offset.getValue().getMinSignedBits() <= 64)
+ SalvageExpr->createOffsetExpr(Offset.getValue().getSExtValue(),
+ LSRInductionVar);
+ } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
+ SE))
+ return false;
+ }
+
+ // Merge the DbgValueBuilder generated expressions and the original
+ // DIExpression, place the result into an new vector.
+ SmallVector<uint64_t, 3> NewExpr;
+ if (DVIRec.Expr->getNumElements() == 0) {
+ assert(DVIRec.RecoveryExprs.size() == 1 &&
+ "Expected only a single recovery expression for an empty "
+ "DIExpression.");
+ assert(DVIRec.RecoveryExprs[0] &&
+ "Expected a SCEVDbgSalvageBuilder for location 0");
+ SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
+ B->appendToVectors(NewExpr, NewLocationOps);
+ }
+ for (const auto &Op : DVIRec.Expr->expr_ops()) {
+ // Most Ops needn't be updated.
+ if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
+ Op.appendToVector(NewExpr);
+ continue;
+ }
+
+ uint64_t LocationArgIndex = Op.getArg(0);
+ SCEVDbgValueBuilder *DbgBuilder =
+ DVIRec.RecoveryExprs[LocationArgIndex].get();
+ // The location doesn't have s SCEVDbgValueBuilder, so LSR did not
+ // optimise it away. So just translate the argument to the updated
+ // location index.
+ if (!DbgBuilder) {
+ NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
+ assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
+ "Expected a positive index for the location-op position.");
+ NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
+ continue;
+ }
+ // The location has a recovery expression.
+ DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
+ }
+
+ UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n");
+ return true;
}
+/// Obtain an expression for the iteration count, then attempt to salvage the
+/// dbg.value intrinsics.
static void
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar,
- SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
if (DVIToUpdate.empty())
return;
@@ -6213,49 +6473,22 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
if (!IVAddRec->isAffine())
return;
+ // Prevent translation using excessive resources.
if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
return;
// The iteration count is required to recover location values.
SCEVDbgValueBuilder IterCountExpr;
- IterCountExpr.pushValue(LSRInductionVar);
+ IterCountExpr.pushLocation(LSRInductionVar);
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
return;
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
<< '\n');
- // Needn't salvage if the location op hasn't been undef'd by LSR.
for (auto &DVIRec : DVIToUpdate) {
- if (!DVIRec.DVI->isUndef())
- continue;
-
- // Some DVIs that were single location-op when cached are now multi-op,
- // due to LSR optimisations. However, multi-op salvaging is not yet
- // supported by SCEV salvaging. But, we can attempt a salvage by restoring
- // the pre-LSR single-op expression.
- if (DVIRec.DVI->hasArgList()) {
- if (!DVIRec.DVI->getVariableLocationOp(0))
- continue;
- llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType();
- DVIRec.DVI->setRawLocation(
- llvm::ValueAsMetadata::get(UndefValue::get(Ty)));
- DVIRec.DVI->setExpression(DVIRec.Expr);
- }
-
- LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: "
- << *DVIRec.SCEV << '\n');
-
- // Create a simple expression if the IV and value to salvage SCEVs
- // start values differ by only a constant value.
- if (Optional<APInt> Offset =
- SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) {
- if (Offset.getValue().getMinSignedBits() <= 64)
- RewriteDVIUsingOffset(DVIRec, *LSRInductionVar,
- Offset.getValue().getSExtValue());
- } else {
- RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
- }
+ SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
+ IterCountExpr);
}
}
}
@@ -6263,39 +6496,53 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
/// Identify and cache salvageable DVI locations and expressions along with the
/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
/// cacheing and salvaging.
-static void
-DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
- SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
- SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
+static void DbgGatherSalvagableDVI(
+ Loop *L, ScalarEvolution &SE,
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
+ SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
for (auto &B : L->getBlocks()) {
for (auto &I : *B) {
auto DVI = dyn_cast<DbgValueInst>(&I);
if (!DVI)
continue;
-
+ // Ensure that if any location op is undef that the dbg.vlue is not
+ // cached.
if (DVI->isUndef())
continue;
- if (DVI->hasArgList())
- continue;
+ // Check that the location op SCEVs are suitable for translation to
+ // DIExpression.
+ const auto &HasTranslatableLocationOps =
+ [&](const DbgValueInst *DVI) -> bool {
+ for (const auto LocOp : DVI->location_ops()) {
+ if (!LocOp)
+ return false;
- if (!DVI->getVariableLocationOp(0) ||
- !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
- continue;
+ if (!SE.isSCEVable(LocOp->getType()))
+ return false;
- // SCEVUnknown wraps an llvm::Value, it does not have a start and stride.
- // Therefore no translation to DIExpression is performed.
- const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0));
- if (isa<SCEVUnknown>(S))
- continue;
+ const SCEV *S = SE.getSCEV(LocOp);
+ if (SE.containsUndefs(S))
+ return false;
+ }
+ return true;
+ };
- // Avoid wasting resources generating an expression containing undef.
- if (SE.containsUndefs(S))
+ if (!HasTranslatableLocationOps(DVI))
continue;
- SalvageableDVISCEVs.push_back(
- {DVI, DVI->getExpression(), DVI->getRawLocation(),
- SE.getSCEV(DVI->getVariableLocationOp(0))});
+ std::unique_ptr<DVIRecoveryRec> NewRec =
+ std::make_unique<DVIRecoveryRec>(DVI);
+ // Each location Op may need a SCEVDbgValueBuilder in order to recover it.
+ // Pre-allocating a vector will enable quick lookups of the builder later
+ // during the salvage.
+ NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps());
+ for (const auto LocOp : DVI->location_ops()) {
+ NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
+ NewRec->LocationOps.push_back(LocOp);
+ NewRec->HadLocationArgList = DVI->hasArgList();
+ }
+ SalvageableDVISCEVs.push_back(std::move(NewRec));
DVIHandles.insert(DVI);
}
}
@@ -6344,9 +6591,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// Debug preservation - before we start removing anything identify which DVI
// meet the salvageable criteria and store their DIExpression and SCEVs.
- SmallVector<DVIRecoveryRec, 2> SalvageableDVI;
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
- DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles);
+ DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
bool Changed = false;
std::unique_ptr<MemorySSAUpdater> MSSAU;
@@ -6375,8 +6622,26 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
}
}
+ // LSR may at times remove all uses of an induction variable from a loop.
+ // The only remaining use is the PHI in the exit block.
+ // When this is the case, if the exit value of the IV can be calculated using
+ // SCEV, we can replace the exit block PHI with the final value of the IV and
+ // skip the updates in each loop iteration.
+ if (L->isRecursivelyLCSSAForm(DT, LI) && LoopExitValHasSingleUse(L)) {
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Rewriter(SE, DL, "lsr", false);
+ int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
+ OnlyCheapRepl, DeadInsts);
+ if (Rewrites) {
+ Changed = true;
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
+ MSSAU.get());
+ DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
+ }
+ }
- if (SalvageableDVI.empty())
+ if (SalvageableDVIRecords.empty())
return Changed;
// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
@@ -6384,13 +6649,16 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// TODO: Allow for multiple IV references for nested AddRecSCEVs
for (auto &L : LI) {
if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
- DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI);
+ DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);
else {
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
"could not be identified.\n");
}
}
+ for (auto &Rec : SalvageableDVIRecords)
+ Rec->clear();
+ SalvageableDVIRecords.clear();
DVIHandles.clear();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 1ecbb86724e1..8c2868563227 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -42,10 +43,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <cassert>
@@ -331,14 +330,23 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
Loop *SubLoop = L->getSubLoops()[0];
- unsigned InnerLoopSize =
+ InstructionCost InnerLoopSizeIC =
ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
Convergent, TTI, EphValues, UP.BEInsns);
- unsigned OuterLoopSize =
+ InstructionCost OuterLoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
- LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
+ LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n");
+ LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n");
+
+ if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
+ << " with invalid cost.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
+ unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
+
if (NotDuplicatable) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
"instructions.\n");
@@ -364,7 +372,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderInner});
- if (NewInnerEpilogueLoopID.hasValue())
+ if (NewInnerEpilogueLoopID)
SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
// Find trip count and trip multiple
@@ -394,14 +402,14 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderOuter});
- if (NewOuterEpilogueLoopID.hasValue())
+ if (NewOuterEpilogueLoopID)
EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
}
Optional<MDNode *> NewInnerLoopID =
makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupInner});
- if (NewInnerLoopID.hasValue())
+ if (NewInnerLoopID)
SubLoop->setLoopID(NewInnerLoopID.getValue());
else
SubLoop->setLoopID(OrigSubLoopID);
@@ -410,7 +418,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
OrigOuterLoopID,
{LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
- if (NewOuterLoopID.hasValue()) {
+ if (NewOuterLoopID) {
L->setLoopID(NewOuterLoopID.getValue());
// Do not setLoopAlreadyUnrolled if a followup was given.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9beb2281cf0f..fda86afe5f9d 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -133,7 +132,7 @@ static cl::opt<bool> UnrollAllowRemainder(
"when unrolling a loop."));
static cl::opt<bool>
- UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
+ UnrollRuntime("unroll-runtime", cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
static cl::opt<unsigned> UnrollMaxUpperBound(
@@ -254,19 +253,19 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Apply user values provided by argument
- if (UserThreshold.hasValue()) {
+ if (UserThreshold) {
UP.Threshold = *UserThreshold;
UP.PartialThreshold = *UserThreshold;
}
- if (UserCount.hasValue())
+ if (UserCount)
UP.Count = *UserCount;
- if (UserAllowPartial.hasValue())
+ if (UserAllowPartial)
UP.Partial = *UserAllowPartial;
- if (UserRuntime.hasValue())
+ if (UserRuntime)
UP.Runtime = *UserRuntime;
- if (UserUpperBound.hasValue())
+ if (UserUpperBound)
UP.UpperBound = *UserUpperBound;
- if (UserFullUnrollMaxCount.hasValue())
+ if (UserFullUnrollMaxCount)
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
@@ -664,7 +663,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
}
/// ApproximateLoopSize - Approximate the size of the loop.
-unsigned llvm::ApproximateLoopSize(
+InstructionCost llvm::ApproximateLoopSize(
const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
@@ -675,7 +674,7 @@ unsigned llvm::ApproximateLoopSize(
NotDuplicatable = Metrics.notDuplicatable;
Convergent = Metrics.convergent;
- unsigned LoopSize = Metrics.NumInsts;
+ InstructionCost LoopSize = Metrics.NumInsts;
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -683,7 +682,9 @@ unsigned llvm::ApproximateLoopSize(
// that each loop has at least three instructions (likely a conditional
// branch, a comparison feeding that branch, and some kind of loop increment
// feeding that comparison instruction).
- LoopSize = std::max(LoopSize, BEInsns + 1);
+ if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
+ // This is an open coded max() on InstructionCost
+ LoopSize = BEInsns + 1;
return LoopSize;
}
@@ -788,15 +789,13 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
// 2nd priority is unroll count set by pragma.
if (PInfo.PragmaCount > 0) {
- if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
return PInfo.PragmaCount;
}
- if (PInfo.PragmaFullUnroll && TripCount != 0) {
- if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
- return TripCount;
- }
+ if (PInfo.PragmaFullUnroll && TripCount != 0)
+ return TripCount;
+
// if didn't return until here, should continue to other priorties
return None;
}
@@ -912,7 +911,7 @@ bool llvm::computeUnrollCount(
if (PP.PeelCount) {
if (UnrollCount.getNumOccurrences() > 0) {
report_fatal_error("Cannot specify both explicit peel count and "
- "explicit unroll count");
+ "explicit unroll count", /*GenCrashDiag=*/false);
}
UP.Count = 1;
UP.Runtime = false;
@@ -1192,10 +1191,18 @@ static LoopUnrollResult tryToUnrollLoop(
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
- unsigned LoopSize =
+ InstructionCost LoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n");
+
+ if (!LoopSizeIC.isValid()) {
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
+ << " with invalid cost.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ unsigned LoopSize = *LoopSizeIC.getValue();
+
if (NotDuplicatable) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
@@ -1316,7 +1323,7 @@ static LoopUnrollResult tryToUnrollLoop(
Optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupRemainder});
- if (RemainderLoopID.hasValue())
+ if (RemainderLoopID)
RemainderLoop->setLoopID(RemainderLoopID.getValue());
}
@@ -1324,7 +1331,7 @@ static LoopUnrollResult tryToUnrollLoop(
Optional<MDNode *> NewLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupUnrolled});
- if (NewLoopID.hasValue()) {
+ if (NewLoopID) {
L->setLoopID(NewLoopID.getValue());
// Do not setLoopAlreadyUnrolled if loop attributes have been specified
@@ -1548,8 +1555,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
PreservedAnalyses LoopUnrollPass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
deleted file mode 100644
index 76bb5497c2c2..000000000000
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ /dev/null
@@ -1,1774 +0,0 @@
-//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass transforms loops that contain branches on loop-invariant conditions
-// to multiple loops. For example, it turns the left into the right code:
-//
-// for (...) if (lic)
-// A for (...)
-// if (lic) A; B; C
-// B else
-// C for (...)
-// A; C
-//
-// This can increase the size of the code exponentially (doubling it every time
-// a loop is unswitched) so we only unswitch if the resultant code will be
-// smaller than a threshold.
-//
-// This pass expects LICM to be run before it to hoist invariant conditions out
-// of the loop, to make the unswitching opportunity obvious.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <set>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-unswitch"
-
-STATISTIC(NumBranches, "Number of branches unswitched");
-STATISTIC(NumSwitches, "Number of switches unswitched");
-STATISTIC(NumGuards, "Number of guards unswitched");
-STATISTIC(NumSelects , "Number of selects unswitched");
-STATISTIC(NumTrivial , "Number of unswitches that are trivial");
-STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
-STATISTIC(TotalInsts, "Total number of instructions analyzed");
-
-// The specific value of 100 here was chosen based only on intuition and a
-// few specific examples.
-static cl::opt<unsigned>
-Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
- cl::init(100), cl::Hidden);
-
-static cl::opt<unsigned>
- MSSAThreshold("loop-unswitch-memoryssa-threshold",
- cl::desc("Max number of memory uses to explore during "
- "partial unswitching analysis"),
- cl::init(100), cl::Hidden);
-
-namespace {
-
- class LUAnalysisCache {
- using UnswitchedValsMap =
- DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>;
- using UnswitchedValsIt = UnswitchedValsMap::iterator;
-
- struct LoopProperties {
- unsigned CanBeUnswitchedCount;
- unsigned WasUnswitchedCount;
- unsigned SizeEstimation;
- UnswitchedValsMap UnswitchedVals;
- };
-
- // Here we use std::map instead of DenseMap, since we need to keep valid
- // LoopProperties pointer for current loop for better performance.
- using LoopPropsMap = std::map<const Loop *, LoopProperties>;
- using LoopPropsMapIt = LoopPropsMap::iterator;
-
- LoopPropsMap LoopsProperties;
- UnswitchedValsMap *CurLoopInstructions = nullptr;
- LoopProperties *CurrentLoopProperties = nullptr;
-
- // A loop unswitching with an estimated cost above this threshold
- // is not performed. MaxSize is turned into unswitching quota for
- // the current loop, and reduced correspondingly, though note that
- // the quota is returned by releaseMemory() when the loop has been
- // processed, so that MaxSize will return to its previous
- // value. So in most cases MaxSize will equal the Threshold flag
- // when a new loop is processed. An exception to that is that
- // MaxSize will have a smaller value while processing nested loops
- // that were introduced due to loop unswitching of an outer loop.
- //
- // FIXME: The way that MaxSize works is subtle and depends on the
- // pass manager processing loops and calling releaseMemory() in a
- // specific order. It would be good to find a more straightforward
- // way of doing what MaxSize does.
- unsigned MaxSize;
-
- public:
- LUAnalysisCache() : MaxSize(Threshold) {}
-
- // Analyze loop. Check its size, calculate is it possible to unswitch
- // it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
- AssumptionCache *AC);
-
- // Clean all data related to given loop.
- void forgetLoop(const Loop *L);
-
- // Mark case value as unswitched.
- // Since SI instruction can be partly unswitched, in order to avoid
- // extra unswitching in cloned loops keep track all unswitched values.
- void setUnswitched(const SwitchInst *SI, const Value *V);
-
- // Check was this case value unswitched before or not.
- bool isUnswitched(const SwitchInst *SI, const Value *V);
-
- // Returns true if another unswitching could be done within the cost
- // threshold.
- bool costAllowsUnswitching();
-
- // Clone all loop-unswitch related loop properties.
- // Redistribute unswitching quotas.
- // Note, that new loop data is stored inside the VMap.
- void cloneData(const Loop *NewLoop, const Loop *OldLoop,
- const ValueToValueMapTy &VMap);
- };
-
- class LoopUnswitch : public LoopPass {
- LoopInfo *LI; // Loop information
- LPPassManager *LPM;
- AssumptionCache *AC;
-
- // Used to check if second loop needs processing after
- // rewriteLoopBodyWithConditionConstant rewrites first loop.
- std::vector<Loop*> LoopProcessWorklist;
-
- LUAnalysisCache BranchesInfo;
-
- bool OptimizeForSize;
- bool RedoLoop = false;
-
- Loop *CurrentLoop = nullptr;
- DominatorTree *DT = nullptr;
- MemorySSA *MSSA = nullptr;
- AAResults *AA = nullptr;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- BasicBlock *LoopHeader = nullptr;
- BasicBlock *LoopPreheader = nullptr;
-
- bool SanitizeMemory;
- SimpleLoopSafetyInfo SafetyInfo;
-
- // LoopBlocks contains all of the basic blocks of the loop, including the
- // preheader of the loop, the body of the loop, and the exit blocks of the
- // loop, in that order.
- std::vector<BasicBlock*> LoopBlocks;
- // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
- std::vector<BasicBlock*> NewBlocks;
-
- bool HasBranchDivergence;
-
- public:
- static char ID; // Pass ID, replacement for typeid
-
- explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false)
- : LoopPass(ID), OptimizeForSize(Os),
- HasBranchDivergence(HasBranchDivergence) {
- initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool processCurrentLoop();
- bool isUnreachableDueToPreviousUnswitching(BasicBlock *);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
- // can remain part of the same loop pass as LICM
- AU.addPreserved<LazyBlockFrequencyInfoPass>();
- AU.addPreserved<LazyBranchProbabilityInfoPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- if (HasBranchDivergence)
- AU.addRequired<LegacyDivergenceAnalysis>();
- getLoopAnalysisUsage(AU);
- }
-
- private:
- void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); }
-
- void initLoopData() {
- LoopHeader = CurrentLoop->getHeader();
- LoopPreheader = CurrentLoop->getLoopPreheader();
- }
-
- /// Split all of the edges from inside the loop to their exit blocks.
- /// Update the appropriate Phi nodes as we do so.
- void splitExitEdges(Loop *L,
- const SmallVectorImpl<BasicBlock *> &ExitBlocks);
-
- bool tryTrivialLoopUnswitch(bool &Changed);
-
- bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI = nullptr,
- ArrayRef<Instruction *> ToDuplicate = {});
- void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
- BasicBlock *ExitBlock, Instruction *TI);
- void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
- Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate = {});
-
- void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
- Constant *Val, bool IsEqual);
-
- void
- emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest, BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate = {});
-
- void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
-
- /// Given that the Invariant is not equal to Val. Simplify instructions
- /// in the loop.
- Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
- Constant *Val);
- };
-
-} // end anonymous namespace
-
-// Analyze loop. Check its size, calculate is it possible to unswitch
-// it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
- AssumptionCache *AC) {
- LoopPropsMapIt PropsIt;
- bool Inserted;
- std::tie(PropsIt, Inserted) =
- LoopsProperties.insert(std::make_pair(L, LoopProperties()));
-
- LoopProperties &Props = PropsIt->second;
-
- if (Inserted) {
- // New loop.
-
- // Limit the number of instructions to avoid causing significant code
- // expansion, and the number of basic blocks, to avoid loops with
- // large numbers of branches which cause loop unswitching to go crazy.
- // This is a very ad-hoc heuristic.
-
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
-
- // FIXME: This is overly conservative because it does not take into
- // consideration code simplification opportunities and code that can
- // be shared by the resultant unswitched loops.
- CodeMetrics Metrics;
- for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, TTI, EphValues);
-
- Props.SizeEstimation = Metrics.NumInsts;
- Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
- Props.WasUnswitchedCount = 0;
- MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
-
- if (Metrics.notDuplicatable) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName()
- << ", contents cannot be "
- << "duplicated!\n");
- return false;
- }
- }
-
- // Be careful. This links are good only before new loop addition.
- CurrentLoopProperties = &Props;
- CurLoopInstructions = &Props.UnswitchedVals;
-
- return true;
-}
-
-// Clean all data related to given loop.
-void LUAnalysisCache::forgetLoop(const Loop *L) {
- LoopPropsMapIt LIt = LoopsProperties.find(L);
-
- if (LIt != LoopsProperties.end()) {
- LoopProperties &Props = LIt->second;
- MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
- Props.SizeEstimation;
- LoopsProperties.erase(LIt);
- }
-
- CurrentLoopProperties = nullptr;
- CurLoopInstructions = nullptr;
-}
-
-// Mark case value as unswitched.
-// Since SI instruction can be partly unswitched, in order to avoid
-// extra unswitching in cloned loops keep track all unswitched values.
-void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) {
- (*CurLoopInstructions)[SI].insert(V);
-}
-
-// Check was this case value unswitched before or not.
-bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
- return (*CurLoopInstructions)[SI].count(V);
-}
-
-bool LUAnalysisCache::costAllowsUnswitching() {
- return CurrentLoopProperties->CanBeUnswitchedCount > 0;
-}
-
-// Clone all loop-unswitch related loop properties.
-// Redistribute unswitching quotas.
-// Note, that new loop data is stored inside the VMap.
-void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
- const ValueToValueMapTy &VMap) {
- LoopProperties &NewLoopProps = LoopsProperties[NewLoop];
- LoopProperties &OldLoopProps = *CurrentLoopProperties;
- UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals;
-
- // Reallocate "can-be-unswitched quota"
-
- --OldLoopProps.CanBeUnswitchedCount;
- ++OldLoopProps.WasUnswitchedCount;
- NewLoopProps.WasUnswitchedCount = 0;
- unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
- NewLoopProps.CanBeUnswitchedCount = Quota / 2;
- OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
-
- NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
-
- // Clone unswitched values info:
- // for new loop switches we clone info about values that was
- // already unswitched and has redundant successors.
- for (const auto &I : Insts) {
- const SwitchInst *OldInst = I.first;
- Value *NewI = VMap.lookup(OldInst);
- const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI);
- assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
-
- NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
- }
-}
-
-char LoopUnswitch::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
- false, false)
-
-Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) {
- return new LoopUnswitch(Os, HasBranchDivergence);
-}
-
-/// Operator chain lattice.
-enum OperatorChain {
- OC_OpChainNone, ///< There is no operator.
- OC_OpChainOr, ///< There are only ORs.
- OC_OpChainAnd, ///< There are only ANDs.
- OC_OpChainMixed ///< There are ANDs and ORs.
-};
-
-/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
-/// an invariant piece, return the invariant. Otherwise, return null.
-//
-/// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a
-/// mixed operator chain, as we can not reliably find a value which will
-/// simplify the operator chain. If the chain is AND-only or OR-only, we can use
-/// 0 or ~0 to simplify the chain.
-///
-/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
-/// simplify the condition itself to a loop variant condition, but at the
-/// cost of creating an entirely new loop.
-static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
- OperatorChain &ParentChain,
- DenseMap<Value *, Value *> &Cache,
- MemorySSAUpdater *MSSAU) {
- auto CacheIt = Cache.find(Cond);
- if (CacheIt != Cache.end())
- return CacheIt->second;
-
- // We started analyze new instruction, increment scanned instructions counter.
- ++TotalInsts;
-
- // We can never unswitch on vector conditions.
- if (Cond->getType()->isVectorTy())
- return nullptr;
-
- // Constants should be folded, not unswitched on!
- if (isa<Constant>(Cond)) return nullptr;
-
- // TODO: Handle: br (VARIANT|INVARIANT).
-
- // Hoist simple values out.
- if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) {
- Cache[Cond] = Cond;
- return Cond;
- }
-
- // Walk up the operator chain to find partial invariant conditions.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
- if (BO->getOpcode() == Instruction::And ||
- BO->getOpcode() == Instruction::Or) {
- // Given the previous operator, compute the current operator chain status.
- OperatorChain NewChain;
- switch (ParentChain) {
- case OC_OpChainNone:
- NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
- OC_OpChainOr;
- break;
- case OC_OpChainOr:
- NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr :
- OC_OpChainMixed;
- break;
- case OC_OpChainAnd:
- NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
- OC_OpChainMixed;
- break;
- case OC_OpChainMixed:
- NewChain = OC_OpChainMixed;
- break;
- }
-
- // If we reach a Mixed state, we do not want to keep walking up as we can not
- // reliably find a value that will simplify the chain. With this check, we
- // will return null on the first sight of mixed chain and the caller will
- // either backtrack to find partial LIV in other operand or return null.
- if (NewChain != OC_OpChainMixed) {
- // Update the current operator chain type before we search up the chain.
- ParentChain = NewChain;
- // If either the left or right side is invariant, we can unswitch on this,
- // which will cause the branch to go away in one loop and the condition to
- // simplify in the other one.
- if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed,
- ParentChain, Cache, MSSAU)) {
- Cache[Cond] = LHS;
- return LHS;
- }
- // We did not manage to find a partial LIV in operand(0). Backtrack and try
- // operand(1).
- ParentChain = NewChain;
- if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed,
- ParentChain, Cache, MSSAU)) {
- Cache[Cond] = RHS;
- return RHS;
- }
- }
- }
-
- Cache[Cond] = nullptr;
- return nullptr;
-}
-
-/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
-/// an invariant piece, return the invariant along with the operator chain type.
-/// Otherwise, return null.
-static std::pair<Value *, OperatorChain>
-findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
- MemorySSAUpdater *MSSAU) {
- DenseMap<Value *, Value *> Cache;
- OperatorChain OpChain = OC_OpChainNone;
- Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU);
-
- // In case we do find a LIV, it can not be obtained by walking up a mixed
- // operator chain.
- assert((!FCond || OpChain != OC_OpChainMixed) &&
- "Do not expect a partial LIV with mixed operator chain");
- return {FCond, OpChain};
-}
-
-bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
- if (skipLoop(L))
- return false;
-
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LPM = &LPMRef;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- CurrentLoop = L;
- Function *F = CurrentLoop->getHeader()->getParent();
-
- SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
- if (SanitizeMemory)
- SafetyInfo.computeLoopSafetyInfo(L);
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- bool Changed = false;
- do {
- assert(CurrentLoop->isLCSSAForm(*DT));
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- RedoLoop = false;
- Changed |= processCurrentLoop();
- } while (RedoLoop);
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- return Changed;
-}
-
-// Return true if the BasicBlock BB is unreachable from the loop header.
-// Return false, otherwise.
-bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {
- auto *Node = DT->getNode(BB)->getIDom();
- BasicBlock *DomBB = Node->getBlock();
- while (CurrentLoop->contains(DomBB)) {
- BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator());
-
- Node = DT->getNode(DomBB)->getIDom();
- DomBB = Node->getBlock();
-
- if (!BInst || !BInst->isConditional())
- continue;
-
- Value *Cond = BInst->getCondition();
- if (!isa<ConstantInt>(Cond))
- continue;
-
- BasicBlock *UnreachableSucc =
- Cond == ConstantInt::getTrue(Cond->getContext())
- ? BInst->getSuccessor(1)
- : BInst->getSuccessor(0);
-
- if (DT->dominates(UnreachableSucc, BB))
- return true;
- }
- return false;
-}
-
-/// FIXME: Remove this workaround when freeze related patches are done.
-/// LoopUnswitch and Equality propagation in GVN have discrepancy about
-/// whether branch on undef/poison has undefine behavior. Here it is to
-/// rule out some common cases that we found such discrepancy already
-/// causing problems. Detail could be found in PR31652. Note if the
-/// func returns true, it is unsafe. But if it is false, it doesn't mean
-/// it is necessarily safe.
-static bool equalityPropUnSafe(Value &LoopCond) {
- ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond);
- if (!CI || !CI->isEquality())
- return false;
-
- Value *LHS = CI->getOperand(0);
- Value *RHS = CI->getOperand(1);
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return true;
-
- auto HasUndefInPHI = [](PHINode &PN) {
- for (Value *Opd : PN.incoming_values()) {
- if (isa<UndefValue>(Opd))
- return true;
- }
- return false;
- };
- PHINode *LPHI = dyn_cast<PHINode>(LHS);
- PHINode *RPHI = dyn_cast<PHINode>(RHS);
- if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI)))
- return true;
-
- auto HasUndefInSelect = [](SelectInst &SI) {
- if (isa<UndefValue>(SI.getTrueValue()) ||
- isa<UndefValue>(SI.getFalseValue()))
- return true;
- return false;
- };
- SelectInst *LSI = dyn_cast<SelectInst>(LHS);
- SelectInst *RSI = dyn_cast<SelectInst>(RHS);
- if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI)))
- return true;
- return false;
-}
-
-/// Do actual work and unswitch loop if possible and profitable.
-bool LoopUnswitch::processCurrentLoop() {
- bool Changed = false;
-
- initLoopData();
-
- // If LoopSimplify was unable to form a preheader, don't do any unswitching.
- if (!LoopPreheader)
- return false;
-
- // Loops with indirectbr cannot be cloned.
- if (!CurrentLoop->isSafeToClone())
- return false;
-
- // Without dedicated exits, splitting the exit edge may fail.
- if (!CurrentLoop->hasDedicatedExits())
- return false;
-
- LLVMContext &Context = LoopHeader->getContext();
-
- // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
- if (!BranchesInfo.countLoop(
- CurrentLoop,
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *CurrentLoop->getHeader()->getParent()),
- AC))
- return false;
-
- // Try trivial unswitch first before loop over other basic blocks in the loop.
- if (tryTrivialLoopUnswitch(Changed)) {
- return true;
- }
-
- // Do not do non-trivial unswitch while optimizing for size.
- // FIXME: Use Function::hasOptSize().
- if (OptimizeForSize ||
- LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
- return Changed;
-
- // Run through the instructions in the loop, keeping track of three things:
- //
- // - That we do not unswitch loops containing convergent operations, as we
- // might be making them control dependent on the unswitch value when they
- // were not before.
- // FIXME: This could be refined to only bail if the convergent operation is
- // not already control-dependent on the unswitch value.
- //
- // - That basic blocks in the loop contain invokes whose predecessor edges we
- // cannot split.
- //
- // - The set of guard intrinsics encountered (these are non terminator
- // instructions that are also profitable to be unswitched).
-
- SmallVector<IntrinsicInst *, 4> Guards;
-
- for (const auto BB : CurrentLoop->blocks()) {
- for (auto &I : *BB) {
- auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
- continue;
- if (CB->isConvergent())
- return Changed;
- if (auto *II = dyn_cast<InvokeInst>(&I))
- if (!II->getUnwindDest()->canSplitPredecessors())
- return Changed;
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::experimental_guard)
- Guards.push_back(II);
- }
- }
-
- for (IntrinsicInst *Guard : Guards) {
- Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop,
- Changed, MSSAU.get())
- .first;
- if (LoopCond &&
- unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
- // NB! Unswitching (if successful) could have erased some of the
- // instructions in Guards leaving dangling pointers there. This is fine
- // because we're returning now, and won't look at Guards again.
- ++NumGuards;
- return true;
- }
- }
-
- // Loop over all of the basic blocks in the loop. If we find an interior
- // block that is branching on a loop-invariant condition, we can unswitch this
- // loop.
- for (Loop::block_iterator I = CurrentLoop->block_begin(),
- E = CurrentLoop->block_end();
- I != E; ++I) {
- Instruction *TI = (*I)->getTerminator();
-
- // Unswitching on a potentially uninitialized predicate is not
- // MSan-friendly. Limit this to the cases when the original predicate is
- // guaranteed to execute, to avoid creating a use-of-uninitialized-value
- // in the code that did not have one.
- // This is a workaround for the discrepancy between LLVM IR and MSan
- // semantics. See PR28054 for more details.
- if (SanitizeMemory &&
- !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop))
- continue;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- // Some branches may be rendered unreachable because of previous
- // unswitching.
- // Unswitch only those branches that are reachable.
- if (isUnreachableDueToPreviousUnswitching(*I))
- continue;
-
- // If this isn't branching on an invariant condition, we can't unswitch
- // it.
- if (BI->isConditional()) {
- // See if this, or some part of it, is loop invariant. If so, we can
- // unswitch on it if we desire.
- Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
- if (LoopCond && !equalityPropUnSafe(*LoopCond) &&
- unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
- ++NumBranches;
- return true;
- }
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *SC = SI->getCondition();
- Value *LoopCond;
- OperatorChain OpChain;
- std::tie(LoopCond, OpChain) =
- findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get());
-
- unsigned NumCases = SI->getNumCases();
- if (LoopCond && NumCases) {
- // Find a value to unswitch on:
- // FIXME: this should chose the most expensive case!
- // FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = nullptr;
- // Find a case value such that at least one case value is unswitched
- // out.
- if (OpChain == OC_OpChainAnd) {
- // If the chain only has ANDs and the switch has a case value of 0.
- // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
- auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
- if (BranchesInfo.isUnswitched(SI, AllZero))
- continue;
- // We are unswitching 0 out.
- UnswitchVal = AllZero;
- } else if (OpChain == OC_OpChainOr) {
- // If the chain only has ORs and the switch has a case value of ~0.
- // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
- auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
- if (BranchesInfo.isUnswitched(SI, AllOne))
- continue;
- // We are unswitching ~0 out.
- UnswitchVal = AllOne;
- } else {
- assert(OpChain == OC_OpChainNone &&
- "Expect to unswitch on trivial chain");
- // Do not process same value again and again.
- // At this point we have some cases already unswitched and
- // some not yet unswitched. Let's find the first not yet unswitched one.
- for (auto Case : SI->cases()) {
- Constant *UnswitchValCandidate = Case.getCaseValue();
- if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
- UnswitchVal = UnswitchValCandidate;
- break;
- }
- }
- }
-
- if (!UnswitchVal)
- continue;
-
- if (unswitchIfProfitable(LoopCond, UnswitchVal)) {
- ++NumSwitches;
- // In case of a full LIV, UnswitchVal is the value we unswitched out.
- // In case of a partial LIV, we only unswitch when its an AND-chain
- // or OR-chain. In both cases switch input value simplifies to
- // UnswitchVal.
- BranchesInfo.setUnswitched(SI, UnswitchVal);
- return true;
- }
- }
- }
-
- // Scan the instructions to check for unswitchable values.
- for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
- BBI != E; ++BBI)
- if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
- Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
- if (LoopCond &&
- unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
- ++NumSelects;
- return true;
- }
- }
- }
-
- // Check if there is a header condition that is invariant along the patch from
- // either the true or false successors to the header. This allows unswitching
- // conditions depending on memory accesses, if there's a path not clobbering
- // the memory locations. Check if this transform has been disabled using
- // metadata, to avoid unswitching the same loop multiple times.
- if (MSSA &&
- !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
- if (auto Info =
- hasPartialIVCondition(*CurrentLoop, MSSAThreshold, *MSSA, *AA)) {
- assert(!Info->InstToDuplicate.empty() &&
- "need at least a partially invariant condition");
- LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
- << *Info->InstToDuplicate[0] << "\n");
-
- Instruction *TI = CurrentLoop->getHeader()->getTerminator();
- Value *LoopCond = Info->InstToDuplicate[0];
-
- // If the partially unswitched path is a no-op and has a single exit
- // block, we do not need to do full unswitching. Instead, we can directly
- // branch to the exit.
- // TODO: Instead of duplicating the checks, we could also just directly
- // branch to the exit from the conditional branch in the loop.
- if (Info->PathIsNoop) {
- if (HasBranchDivergence &&
- getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
- << CurrentLoop->getHeader()->getName()
- << " at non-trivial condition '"
- << *Info->KnownValue << "' == " << *LoopCond << "\n"
- << ". Condition is divergent.\n");
- return false;
- }
-
- ++NumBranches;
-
- BasicBlock *TrueDest = LoopHeader;
- BasicBlock *FalseDest = Info->ExitForPath;
- if (Info->KnownValue->isOneValue())
- std::swap(TrueDest, FalseDest);
-
- auto *OldBr =
- cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator());
- emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest,
- FalseDest, OldBr, TI,
- Info->InstToDuplicate);
- delete OldBr;
- RedoLoop = false;
- return true;
- }
-
- // Otherwise, the path is not a no-op. Run regular unswitching.
- if (unswitchIfProfitable(LoopCond, Info->KnownValue,
- CurrentLoop->getHeader()->getTerminator(),
- Info->InstToDuplicate)) {
- ++NumBranches;
- RedoLoop = false;
- return true;
- }
- }
- }
-
- return Changed;
-}
-
-/// Check to see if all paths from BB exit the loop with no side effects
-/// (including infinite loops).
-///
-/// If true, we return true and set ExitBB to the block we
-/// exit through.
-///
-static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
- BasicBlock *&ExitBB,
- std::set<BasicBlock*> &Visited) {
- if (!Visited.insert(BB).second) {
- // Already visited. Without more analysis, this could indicate an infinite
- // loop.
- return false;
- }
- if (!L->contains(BB)) {
- // Otherwise, this is a loop exit, this is fine so long as this is the
- // first exit.
- if (ExitBB) return false;
- ExitBB = BB;
- return true;
- }
-
- // Otherwise, this is an unvisited intra-loop node. Check all successors.
- for (BasicBlock *Succ : successors(BB)) {
- // Check to see if the successor is a trivial loop exit.
- if (!isTrivialLoopExitBlockHelper(L, Succ, ExitBB, Visited))
- return false;
- }
-
- // Okay, everything after this looks good, check to make sure that this block
- // doesn't include any side effects.
- for (Instruction &I : *BB)
- if (I.mayHaveSideEffects())
- return false;
-
- return true;
-}
-
-/// Return true if the specified block unconditionally leads to an exit from
-/// the specified loop, and has no side-effects in the process. If so, return
-/// the block that is exited to, otherwise return null.
-static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
- std::set<BasicBlock*> Visited;
- Visited.insert(L->getHeader()); // Branches to header make infinite loops.
- BasicBlock *ExitBB = nullptr;
- if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
- return ExitBB;
- return nullptr;
-}
-
-/// We have found that we can unswitch CurrentLoop when LoopCond == Val to
-/// simplify the loop. If we decide that this is profitable,
-/// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
- // Check to see if it would be profitable to unswitch current loop.
- if (!BranchesInfo.costAllowsUnswitching()) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
- << CurrentLoop->getHeader()->getName()
- << " at non-trivial condition '" << *Val
- << "' == " << *LoopCond << "\n"
- << ". Cost too high.\n");
- return false;
- }
- if (HasBranchDivergence &&
- getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
- << CurrentLoop->getHeader()->getName()
- << " at non-trivial condition '" << *Val
- << "' == " << *LoopCond << "\n"
- << ". Condition is divergent.\n");
- return false;
- }
-
- unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
- return true;
-}
-
-/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
-/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
-/// and remove (but not erase!) it from the function.
-void LoopUnswitch::emitPreheaderBranchOnCondition(
- Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
- assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
- assert(TrueDest != FalseDest && "Branch targets should be different");
-
- // Insert a conditional branch on LIC to the two preheaders. The original
- // code is the true version and the new code is the false version.
- Value *BranchVal = LIC;
- bool Swapped = false;
-
- if (!ToDuplicate.empty()) {
- ValueToValueMapTy Old2New;
- for (Instruction *I : reverse(ToDuplicate)) {
- auto *New = I->clone();
- New->insertBefore(OldBranch);
- RemapInstruction(New, Old2New,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- Old2New[I] = New;
-
- if (MSSAU) {
- MemorySSA *MSSA = MSSAU->getMemorySSA();
- auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
- if (!MemA)
- continue;
-
- Loop *L = LI->getLoopFor(I->getParent());
- auto *DefiningAccess = MemA->getDefiningAccess();
- // Get the first defining access before the loop.
- while (L->contains(DefiningAccess->getBlock())) {
- // If the defining access is a MemoryPhi, get the incoming
- // value for the pre-header as defining access.
- if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
- DefiningAccess =
- MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
- } else {
- DefiningAccess =
- cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
- }
- }
- MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
- MemorySSA::BeforeTerminator);
- }
- }
- BranchVal = Old2New[ToDuplicate[0]];
- } else {
-
- if (!isa<ConstantInt>(Val) ||
- Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
- else if (Val != ConstantInt::getTrue(Val->getContext())) {
- // We want to enter the new loop when the condition is true.
- std::swap(TrueDest, FalseDest);
- Swapped = true;
- }
- }
-
- // Old branch will be removed, so save its parent and successor to update the
- // DomTree.
- auto *OldBranchSucc = OldBranch->getSuccessor(0);
- auto *OldBranchParent = OldBranch->getParent();
-
- // Insert the new branch.
- BranchInst *BI =
- IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);
- if (Swapped)
- BI->swapProfMetadata();
-
- // Remove the old branch so there is only one branch at the end. This is
- // needed to perform DomTree's internal DFS walk on the function's CFG.
- OldBranch->removeFromParent();
-
- // Inform the DT about the new branch.
- if (DT) {
- // First, add both successors.
- SmallVector<DominatorTree::UpdateType, 3> Updates;
- if (TrueDest != OldBranchSucc)
- Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest});
- if (FalseDest != OldBranchSucc)
- Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest});
- // If both of the new successors are different from the old one, inform the
- // DT that the edge was deleted.
- if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
- Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
- }
-
- if (MSSAU)
- MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
- else
- DT->applyUpdates(Updates);
- }
-
- // If either edge is critical, split it. This helps preserve LoopSimplify
- // form for enclosing loops.
- auto Options =
- CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
- SplitCriticalEdge(BI, 0, Options);
- SplitCriticalEdge(BI, 1, Options);
-}
-
-/// Given a loop that has a trivial unswitchable condition in it (a cond branch
-/// from its header block to its latch block, where the path through the loop
-/// that doesn't execute its body has no side-effects), unswitch it. This
-/// doesn't involve any code duplication, just moving the conditional branch
-/// outside of the loop and updating loop info.
-void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
- BasicBlock *ExitBlock,
- Instruction *TI) {
- LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
- << LoopHeader->getName() << " [" << L->getBlocks().size()
- << " blocks] in Function "
- << L->getHeader()->getParent()->getName()
- << " on cond: " << *Val << " == " << *Cond << "\n");
- // We are going to make essential changes to CFG. This may invalidate cached
- // information for L or one of its parent loops in SCEV.
- if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
- SEWP->getSE().forgetTopmostLoop(L);
-
- // First step, split the preheader, so that we know that there is a safe place
- // to insert the conditional branch. We will change LoopPreheader to have a
- // conditional branch on Cond.
- BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get());
-
- // Now that we have a place to insert the conditional branch, create a place
- // to branch to: this is the exit block out of the loop that we should
- // short-circuit to.
-
- // Split this block now, so that the loop maintains its exit block, and so
- // that the jump from the preheader can execute the contents of the exit block
- // without actually branching to it (the exit block should be dominated by the
- // loop header, not the preheader).
- assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit =
- SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
-
- // Okay, now we have a position to branch from and a position to branch to,
- // insert the new conditional branch.
- auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator());
- assert(OldBranch && "Failed to split the preheader");
- emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI);
-
- // emitPreheaderBranchOnCondition removed the OldBranch from the function.
- // Delete it, as it is no longer needed.
- delete OldBranch;
-
- // We need to reprocess this loop, it could be unswitched again.
- RedoLoop = true;
-
- // Now that we know that the loop is never entered when this condition is a
- // particular value, rewrite the loop with this info. We know that this will
- // at least eliminate the old branch.
- rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false);
-
- ++NumTrivial;
-}
-
-/// Check if the first non-constant condition starting from the loop header is
-/// a trivial unswitch condition: that is, a condition controls whether or not
-/// the loop does anything at all. If it is a trivial condition, unswitching
-/// produces no code duplications (equivalently, it produces a simpler loop and
-/// a new empty loop, which gets deleted). Therefore always unswitch trivial
-/// condition.
-bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) {
- BasicBlock *CurrentBB = CurrentLoop->getHeader();
- Instruction *CurrentTerm = CurrentBB->getTerminator();
- LLVMContext &Context = CurrentBB->getContext();
-
- // If loop header has only one reachable successor (currently via an
- // unconditional branch or constant foldable conditional branch, but
- // should also consider adding constant foldable switch instruction in
- // future), we should keep looking for trivial condition candidates in
- // the successor as well. An alternative is to constant fold conditions
- // and merge successors into loop header (then we only need to check header's
- // terminator). The reason for not doing this in LoopUnswitch pass is that
- // it could potentially break LoopPassManager's invariants. Folding dead
- // branches could either eliminate the current loop or make other loops
- // unreachable. LCSSA form might also not be preserved after deleting
- // branches. The following code keeps traversing loop header's successors
- // until it finds the trivial condition candidate (condition that is not a
- // constant). Since unswitching generates branches with constant conditions,
- // this scenario could be very common in practice.
- SmallPtrSet<BasicBlock*, 8> Visited;
-
- while (true) {
- // If we exit loop or reach a previous visited block, then
- // we can not reach any trivial condition candidates (unfoldable
- // branch instructions or switch instructions) and no unswitch
- // can happen. Exit and return false.
- if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
- return false;
-
- // Check if this loop will execute any side-effecting instructions (e.g.
- // stores, calls, volatile loads) in the part of the loop that the code
- // *would* execute. Check the header first.
- for (Instruction &I : *CurrentBB)
- if (I.mayHaveSideEffects())
- return false;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
- if (BI->isUnconditional()) {
- CurrentBB = BI->getSuccessor(0);
- } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
- CurrentBB = BI->getSuccessor(0);
- } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
- CurrentBB = BI->getSuccessor(1);
- } else {
- // Found a trivial condition candidate: non-foldable conditional branch.
- break;
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
- // At this point, any constant-foldable instructions should have probably
- // been folded.
- ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (!Cond)
- break;
- // Find the target block we are definitely going to.
- CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
- } else {
- // We do not understand these terminator instructions.
- break;
- }
-
- CurrentTerm = CurrentBB->getTerminator();
- }
-
- // CondVal is the condition that controls the trivial condition.
- // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
- Constant *CondVal = nullptr;
- BasicBlock *LoopExitBB = nullptr;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
- // If this isn't branching on an invariant condition, we can't unswitch it.
- if (!BI->isConditional())
- return false;
-
- Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
-
- // Unswitch only if the trivial condition itself is an LIV (not
- // partial LIV which could occur in and/or)
- if (!LoopCond || LoopCond != BI->getCondition())
- return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
- // side-effects. If so, determine the value of Cond that causes
- // it to do this.
- if ((LoopExitBB =
- isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) {
- CondVal = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB =
- isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) {
- CondVal = ConstantInt::getFalse(Context);
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit
- // block contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- if (equalityPropUnSafe(*LoopCond))
- return false;
-
- unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB,
- CurrentTerm);
- ++NumBranches;
- return true;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
- // If this isn't switching on an invariant condition, we can't unswitch it.
- Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
-
- // Unswitch only if the trivial condition itself is an LIV (not
- // partial LIV which could occur in and/or)
- if (!LoopCond || LoopCond != SI->getCondition())
- return false;
-
- // Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- // Note that we can't trivially unswitch on the default case or
- // on already unswitched cases.
- for (auto Case : SI->cases()) {
- BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate =
- isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) {
- // Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = Case.getCaseValue();
-
- // Check that it was not unswitched before, since already unswitched
- // trivial vals are looks trivial too.
- if (BranchesInfo.isUnswitched(SI, CaseVal))
- continue;
- LoopExitBB = LoopExitCandidate;
- CondVal = CaseVal;
- break;
- }
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit
- // block contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB,
- nullptr);
-
- // We are only unswitching full LIV.
- BranchesInfo.setUnswitched(SI, CondVal);
- ++NumSwitches;
- return true;
- }
- return false;
-}
-
-/// Split all of the edges from inside the loop to their exit blocks.
-/// Update the appropriate Phi nodes as we do so.
-void LoopUnswitch::splitExitEdges(
- Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
-
- for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
- BasicBlock *ExitBlock = ExitBlocks[I];
- SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
-
- // Although SplitBlockPredecessors doesn't preserve loop-simplify in
- // general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
- /*PreserveLCSSA*/ true);
- }
-}
-
-/// We determined that the loop is profitable to unswitch when LIC equal Val.
-/// Split it into loop versions and test the condition outside of either loop.
-/// Return the loops created as Out1/Out2.
-void LoopUnswitch::unswitchNontrivialCondition(
- Value *LIC, Constant *Val, Loop *L, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
- Function *F = LoopHeader->getParent();
- LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
- << LoopHeader->getName() << " [" << L->getBlocks().size()
- << " blocks] in Function " << F->getName() << " when '"
- << *Val << "' == " << *LIC << "\n");
-
- // We are going to make essential changes to CFG. This may invalidate cached
- // information for L or one of its parent loops in SCEV.
- if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
- SEWP->getSE().forgetTopmostLoop(L);
-
- LoopBlocks.clear();
- NewBlocks.clear();
-
- if (MSSAU && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- // First step, split the preheader and exit blocks, and add these blocks to
- // the LoopBlocks list.
- BasicBlock *NewPreheader =
- SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get());
- LoopBlocks.push_back(NewPreheader);
-
- // We want the loop to come after the preheader, but before the exit blocks.
- llvm::append_range(LoopBlocks, L->blocks());
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getUniqueExitBlocks(ExitBlocks);
-
- // Split all of the edges from inside the loop to their exit blocks. Update
- // the appropriate Phi nodes as we do so.
- splitExitEdges(L, ExitBlocks);
-
- // The exit blocks may have been changed due to edge splitting, recompute.
- ExitBlocks.clear();
- L->getUniqueExitBlocks(ExitBlocks);
-
- // Add exit blocks to the loop blocks.
- llvm::append_range(LoopBlocks, ExitBlocks);
-
- // Next step, clone all of the basic blocks that make up the loop (including
- // the loop preheader and exit blocks), keeping track of the mapping between
- // the instructions and blocks.
- NewBlocks.reserve(LoopBlocks.size());
- ValueToValueMapTy VMap;
- for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) {
- BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F);
-
- NewBlocks.push_back(NewBB);
- VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping.
- }
-
- // Splice the newly inserted blocks into the function right before the
- // original preheader.
- F->getBasicBlockList().splice(NewPreheader->getIterator(),
- F->getBasicBlockList(),
- NewBlocks[0]->getIterator(), F->end());
-
- // Now we create the new Loop object for the versioned loop.
- Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
-
- // Recalculate unswitching quota, inherit simplified switches info for NewBB,
- // Probably clone more loop-unswitch related loop properties.
- BranchesInfo.cloneData(NewLoop, L, VMap);
-
- Loop *ParentLoop = L->getParentLoop();
- if (ParentLoop) {
- // Make sure to add the cloned preheader and exit blocks to the parent loop
- // as well.
- ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
- }
-
- for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) {
- BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]);
- // The new exit block should be in the same loop as the old one.
- if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI]))
- ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
-
- assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
- "Exit block should have been split to have one successor!");
- BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
-
- // If the successor of the exit block had PHI nodes, add an entry for
- // NewExit.
- for (PHINode &PN : ExitSucc->phis()) {
- Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]);
- ValueToValueMapTy::iterator It = VMap.find(V);
- if (It != VMap.end()) V = It->second;
- PN.addIncoming(V, NewExit);
- }
-
- if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
- PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
- &*ExitSucc->getFirstInsertionPt());
-
- for (BasicBlock *BB : predecessors(ExitSucc)) {
- LandingPadInst *LPI = BB->getLandingPadInst();
- LPI->replaceAllUsesWith(PN);
- PN->addIncoming(LPI, BB);
- }
- }
- }
-
- // Rewrite the code to refer to itself.
- for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) {
- for (Instruction &I : *NewBlocks[NBI]) {
- RemapInstruction(&I, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- if (auto *II = dyn_cast<AssumeInst>(&I))
- AC->registerAssumption(II);
- }
- }
-
- // Rewrite the original preheader to select between versions of the loop.
- BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator());
- assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
- "Preheader splitting did not work correctly!");
-
- if (MSSAU) {
- // Update MemorySSA after cloning, and before splitting to unreachables,
- // since that invalidates the 1:1 mapping of clones in VMap.
- LoopBlocksRPO LBRPO(L);
- LBRPO.perform(LI);
- MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
- }
-
- // Emit the new branch that selects between the two versions of this loop.
- emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
- TI, ToDuplicate);
- if (MSSAU) {
- // Update MemoryPhis in Exit blocks.
- MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
-
- // The OldBr was replaced by a new one and removed (but not erased) by
- // emitPreheaderBranchOnCondition. It is no longer needed, so delete it.
- delete OldBR;
-
- LoopProcessWorklist.push_back(NewLoop);
- RedoLoop = true;
-
- // Keep a WeakTrackingVH holding onto LIC. If the first call to
- // RewriteLoopBody
- // deletes the instruction (for example by simplifying a PHI that feeds into
- // the condition that we're unswitching on), we don't rewrite the second
- // iteration.
- WeakTrackingVH LICHandle(LIC);
-
- if (ToDuplicate.empty()) {
- // Now we rewrite the original code to know that the condition is true and
- // the new code to know that the condition is false.
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
-
- // It's possible that simplifying one loop could cause the other to be
- // changed to another value or a constant. If its a constant, don't
- // simplify it.
- if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
- LICHandle && !isa<Constant>(LICHandle))
- rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
- /*IsEqual=*/true);
- } else {
- // Partial unswitching. Update the condition in the right loop with the
- // constant.
- auto *CC = cast<ConstantInt>(Val);
- if (CC->isOneValue()) {
- rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
- /*IsEqual=*/true);
- } else
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
-
- // Mark the new loop as partially unswitched, to avoid unswitching on the
- // same condition again.
- auto &Context = NewLoop->getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
- {DisableUnswitchMD});
- NewLoop->setLoopID(NewLoopID);
- }
-
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-}
-
-/// Remove all instances of I from the worklist vector specified.
-static void removeFromWorklist(Instruction *I,
- std::vector<Instruction *> &Worklist) {
- llvm::erase_value(Worklist, I);
-}
-
-/// When we find that I really equals V, remove I from the
-/// program, replacing all uses with V and update the worklist.
-static void replaceUsesOfWith(Instruction *I, Value *V,
- std::vector<Instruction *> &Worklist, Loop *L,
- LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
- LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
-
- // Add uses to the worklist, which may be dead now.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
- Worklist.push_back(Use);
-
- // Add users to the worklist which may be simplified now.
- for (User *U : I->users())
- Worklist.push_back(cast<Instruction>(U));
- removeFromWorklist(I, Worklist);
- I->replaceAllUsesWith(V);
- if (!I->mayHaveSideEffects()) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- I->eraseFromParent();
- }
- ++NumSimplify;
-}
-
-/// We know either that the value LIC has the value specified by Val in the
-/// specified loop, or we know it does NOT have that value.
-/// Rewrite any uses of LIC or of properties correlated to it.
-void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
- Constant *Val,
- bool IsEqual) {
- assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
-
- // FIXME: Support correlated properties, like:
- // for (...)
- // if (li1 < li2)
- // ...
- // if (li1 > li2)
- // ...
-
- // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
- // selects, switches.
- std::vector<Instruction*> Worklist;
- LLVMContext &Context = Val->getContext();
-
- // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
- // in the loop with the appropriate one directly.
- if (IsEqual || (isa<ConstantInt>(Val) &&
- Val->getType()->isIntegerTy(1))) {
- Value *Replacement;
- if (IsEqual)
- Replacement = Val;
- else
- Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
- !cast<ConstantInt>(Val)->getZExtValue());
-
- for (User *U : LIC->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI || !L->contains(UI))
- continue;
- Worklist.push_back(UI);
- }
-
- for (Instruction *UI : Worklist)
- UI->replaceUsesOfWith(LIC, Replacement);
-
- simplifyCode(Worklist, L);
- return;
- }
-
- // Otherwise, we don't know the precise value of LIC, but we do know that it
- // is certainly NOT "Val". As such, simplify any uses in the loop that we
- // can. This case occurs when we unswitch switch statements.
- for (User *U : LIC->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI || !L->contains(UI))
- continue;
-
- // At this point, we know LIC is definitely not Val. Try to use some simple
- // logic to simplify the user w.r.t. to the context.
- if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) {
- if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
- // This in-loop instruction has been simplified w.r.t. its context,
- // i.e. LIC != Val, make sure we propagate its replacement value to
- // all its users.
- //
- // We can not yet delete UI, the LIC user, yet, because that would invalidate
- // the LIC->users() iterator !. However, we can make this instruction
- // dead by replacing all its users and push it onto the worklist so that
- // it can be properly deleted and its operands simplified.
- UI->replaceAllUsesWith(Replacement);
- }
- }
-
- // This is a LIC user, push it into the worklist so that simplifyCode can
- // attempt to simplify it.
- Worklist.push_back(UI);
-
- // If we know that LIC is not Val, use this info to simplify code.
- SwitchInst *SI = dyn_cast<SwitchInst>(UI);
- if (!SI || !isa<ConstantInt>(Val)) continue;
-
- // NOTE: if a case value for the switch is unswitched out, we record it
- // after the unswitch finishes. We can not record it here as the switch
- // is not a direct user of the partial LIV.
- SwitchInst::CaseHandle DeadCase =
- *SI->findCaseValue(cast<ConstantInt>(Val));
- // Default case is live for multiple values.
- if (DeadCase == *SI->case_default())
- continue;
-
- // Found a dead case value. Don't remove PHI nodes in the
- // successor if they become single-entry, those PHI nodes may
- // be in the Users list.
-
- BasicBlock *Switch = SI->getParent();
- BasicBlock *SISucc = DeadCase.getCaseSuccessor();
- BasicBlock *Latch = L->getLoopLatch();
-
- if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
- // If the DeadCase successor dominates the loop latch, then the
- // transformation isn't safe since it will delete the sole predecessor edge
- // to the latch.
- if (Latch && DT->dominates(SISucc, Latch))
- continue;
-
- // FIXME: This is a hack. We need to keep the successor around
- // and hooked up so as to preserve the loop structure, because
- // trying to update it is complicated. So instead we preserve the
- // loop structure and put the block on a dead code path.
- SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
- // Compute the successors instead of relying on the return value
- // of SplitEdge, since it may have split the switch successor
- // after PHI nodes.
- BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
- BasicBlock *OldSISucc = *succ_begin(NewSISucc);
- // Create an "unreachable" destination.
- BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
- Switch->getParent(),
- OldSISucc);
- new UnreachableInst(Context, Abort);
- // Force the new case destination to branch to the "unreachable"
- // block while maintaining a (dead) CFG edge to the old block.
- NewSISucc->getTerminator()->eraseFromParent();
- BranchInst::Create(Abort, OldSISucc,
- ConstantInt::getTrue(Context), NewSISucc);
- // Release the PHI operands for this edge.
- for (PHINode &PN : NewSISucc->phis())
- PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
- // Tell the domtree about the new block. We don't fully update the
- // domtree here -- instead we force it to do a full recomputation
- // after the pass is complete -- but we do need to inform it of
- // new blocks.
- DT->addNewBlock(Abort, NewSISucc);
- }
-
- simplifyCode(Worklist, L);
-}
-
-/// Now that we have simplified some instructions in the loop, walk over it and
-/// constant prop, dce, and fold control flow where possible. Note that this is
-/// effectively a very simple loop-structure-aware optimizer. During processing
-/// of this loop, L could very well be deleted, so it must not be used.
-///
-/// FIXME: When the loop optimizer is more mature, separate this out to a new
-/// pass.
-///
-void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) {
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- while (!Worklist.empty()) {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
-
- // Simple DCE.
- if (isInstructionTriviallyDead(I)) {
- LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
-
- // Add uses to the worklist, which may be dead now.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
- Worklist.push_back(Use);
- removeFromWorklist(I, Worklist);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- I->eraseFromParent();
- ++NumSimplify;
- continue;
- }
-
- // See if instruction simplification can hack this up. This is common for
- // things like "select false, X, Y" after unswitching made the condition be
- // 'false'. TODO: update the domtree properly so we can pass it here.
- if (Value *V = SimplifyInstruction(I, DL))
- if (LI->replacementPreservesLCSSAForm(I, V)) {
- replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
- continue;
- }
-
- // Special case hacks that appear commonly in unswitched code.
- if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (BI->isUnconditional()) {
- // If BI's parent is the only pred of the successor, fold the two blocks
- // together.
- BasicBlock *Pred = BI->getParent();
- (void)Pred;
- BasicBlock *Succ = BI->getSuccessor(0);
- BasicBlock *SinglePred = Succ->getSinglePredecessor();
- if (!SinglePred) continue; // Nothing to do.
- assert(SinglePred == Pred && "CFG broken");
-
- // Make the LPM and Worklist updates specific to LoopUnswitch.
- removeFromWorklist(BI, Worklist);
- auto SuccIt = Succ->begin();
- while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) {
- for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It)
- if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It)))
- Worklist.push_back(Use);
- for (User *U : PN->users())
- Worklist.push_back(cast<Instruction>(U));
- removeFromWorklist(PN, Worklist);
- ++NumSimplify;
- }
- // Merge the block and make the remaining analyses updates.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get());
- ++NumSimplify;
- continue;
- }
-
- continue;
- }
- }
-}
-
-/// Simple simplifications we can do given the information that Cond is
-/// definitely not equal to Val.
-Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst,
- Value *Invariant,
- Constant *Val) {
- // icmp eq cond, val -> false
- ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
- if (CI && CI->isEquality()) {
- Value *Op0 = CI->getOperand(0);
- Value *Op1 = CI->getOperand(1);
- if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) {
- LLVMContext &Ctx = Inst->getContext();
- if (CI->getPredicate() == CmpInst::ICMP_EQ)
- return ConstantInt::getFalse(Ctx);
- else
- return ConstantInt::getTrue(Ctx);
- }
- }
-
- // FIXME: there may be other opportunities, e.g. comparison with floating
- // point, or Invariant - Val != 0, etc.
- return nullptr;
-}
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 2ff1e8480749..c733aa4701ed 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -70,14 +70,12 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
index 4063e4fe0472..6aba913005d0 100644
--- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
@@ -11,95 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
using namespace llvm;
#define DEBUG_TYPE "loweratomic"
-static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
- IRBuilder<> Builder(CXI);
- Value *Ptr = CXI->getPointerOperand();
- Value *Cmp = CXI->getCompareOperand();
- Value *Val = CXI->getNewValOperand();
-
- LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
- Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
- Value *Res = Builder.CreateSelect(Equal, Val, Orig);
- Builder.CreateStore(Res, Ptr);
-
- Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
- Res = Builder.CreateInsertValue(Res, Equal, 1);
-
- CXI->replaceAllUsesWith(Res);
- CXI->eraseFromParent();
- return true;
-}
-
-bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
- IRBuilder<> Builder(RMWI);
- Value *Ptr = RMWI->getPointerOperand();
- Value *Val = RMWI->getValOperand();
-
- LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
- Value *Res = nullptr;
-
- switch (RMWI->getOperation()) {
- default: llvm_unreachable("Unexpected RMW operation");
- case AtomicRMWInst::Xchg:
- Res = Val;
- break;
- case AtomicRMWInst::Add:
- Res = Builder.CreateAdd(Orig, Val);
- break;
- case AtomicRMWInst::Sub:
- Res = Builder.CreateSub(Orig, Val);
- break;
- case AtomicRMWInst::And:
- Res = Builder.CreateAnd(Orig, Val);
- break;
- case AtomicRMWInst::Nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
- break;
- case AtomicRMWInst::Or:
- Res = Builder.CreateOr(Orig, Val);
- break;
- case AtomicRMWInst::Xor:
- Res = Builder.CreateXor(Orig, Val);
- break;
- case AtomicRMWInst::Max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
- Val, Orig);
- break;
- case AtomicRMWInst::Min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
- Orig, Val);
- break;
- case AtomicRMWInst::UMax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
- Val, Orig);
- break;
- case AtomicRMWInst::UMin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
- Orig, Val);
- break;
- case AtomicRMWInst::FAdd:
- Res = Builder.CreateFAdd(Orig, Val);
- break;
- case AtomicRMWInst::FSub:
- Res = Builder.CreateFSub(Orig, Val);
- break;
- }
- Builder.CreateStore(Res, Ptr);
- RMWI->replaceAllUsesWith(Orig);
- RMWI->eraseFromParent();
- return true;
-}
-
static bool LowerFenceInst(FenceInst *FI) {
FI->eraseFromParent();
return true;
@@ -121,7 +43,7 @@ static bool runOnBasicBlock(BasicBlock &BB) {
if (FenceInst *FI = dyn_cast<FenceInst>(&Inst))
Changed |= LowerFenceInst(FI);
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&Inst))
- Changed |= LowerAtomicCmpXchgInst(CXI);
+ Changed |= lowerAtomicCmpXchgInst(CXI);
else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&Inst))
Changed |= lowerAtomicRMWInst(RMWI);
else if (LoadInst *LI = dyn_cast<LoadInst>(&Inst)) {
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index 186065db327e..47493b54a527 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -26,11 +26,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -96,7 +94,7 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
return HasDeadBlocks;
}
-static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI,
+static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo &TLI,
DominatorTree *DT) {
Optional<DomTreeUpdater> DTU;
if (DT)
@@ -140,21 +138,21 @@ static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI,
IsConstantIntrinsicsHandled++;
break;
case Intrinsic::objectsize:
- NewValue = lowerObjectSizeCall(II, DL, TLI, true);
+ NewValue = lowerObjectSizeCall(II, DL, &TLI, true);
ObjectSizeIntrinsicsHandled++;
break;
}
HasDeadBlocks |= replaceConditionalBranchesOnConstant(
- II, NewValue, DTU.hasValue() ? DTU.getPointer() : nullptr);
+ II, NewValue, DTU ? DTU.getPointer() : nullptr);
}
if (HasDeadBlocks)
- removeUnreachableBlocks(F, DTU.hasValue() ? DTU.getPointer() : nullptr);
+ removeUnreachableBlocks(F, DTU ? DTU.getPointer() : nullptr);
return !Worklist.empty();
}
PreservedAnalyses
LowerConstantIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) {
- if (lowerConstantIntrinsics(F, AM.getCachedResult<TargetLibraryAnalysis>(F),
+ if (lowerConstantIntrinsics(F, AM.getResult<TargetLibraryAnalysis>(F),
AM.getCachedResult<DominatorTreeAnalysis>(F))) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
@@ -178,8 +176,8 @@ public:
}
bool runOnFunction(Function &F) override {
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
DominatorTree *DT = nullptr;
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DT = &DTWP->getDomTree();
@@ -187,6 +185,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -196,6 +195,7 @@ public:
char LowerConstantIntrinsics::ID = 0;
INITIALIZE_PASS_BEGIN(LowerConstantIntrinsics, "lower-constant-intrinsics",
"Lower constant intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(LowerConstantIntrinsics, "lower-constant-intrinsics",
"Lower constant intrinsics", false, false)
diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index a7eb60b5e032..88fad9896c59 100644
--- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -21,12 +21,11 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
using namespace llvm;
@@ -101,6 +100,8 @@ static bool handleSwitchExpect(SwitchInst &SI) {
uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1;
Weights[Index] = LikelyBranchWeightVal;
+ misexpect::checkExpectAnnotations(SI, Weights, /*IsFrontend=*/true);
+
SI.setCondition(ArgValue);
SI.setMetadata(LLVMContext::MD_prof,
@@ -315,13 +316,16 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
std::tie(LikelyBranchWeightVal, UnlikelyBranchWeightVal) =
getBranchWeight(Fn->getIntrinsicID(), CI, 2);
+ SmallVector<uint32_t, 4> ExpectedWeights;
if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
(Predicate == CmpInst::ICMP_EQ)) {
Node =
MDB.createBranchWeights(LikelyBranchWeightVal, UnlikelyBranchWeightVal);
+ ExpectedWeights = {LikelyBranchWeightVal, UnlikelyBranchWeightVal};
} else {
Node =
MDB.createBranchWeights(UnlikelyBranchWeightVal, LikelyBranchWeightVal);
+ ExpectedWeights = {UnlikelyBranchWeightVal, LikelyBranchWeightVal};
}
if (CmpI)
@@ -329,6 +333,8 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
else
BSI.setCondition(ArgValue);
+ misexpect::checkFrontendInstrumentation(BSI, ExpectedWeights);
+
BSI.setMetadata(LLVMContext::MD_prof, Node);
return true;
@@ -409,7 +415,7 @@ public:
bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
};
-}
+} // namespace
char LowerExpectIntrinsic::ID = 0;
INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
diff --git a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 45f5929e3b90..8dc037b10cc8 100644
--- a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -15,7 +15,6 @@
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -49,9 +48,13 @@ static bool lowerGuardIntrinsic(Function &F) {
return false;
SmallVector<CallInst *, 8> ToLower;
- for (auto &I : instructions(F))
- if (isGuard(&I))
- ToLower.push_back(cast<CallInst>(&I));
+ // Traverse through the users of GuardDecl.
+ // This is presumably cheaper than traversing all instructions in the
+ // function.
+ for (auto *U : GuardDecl->users())
+ if (auto *CI = dyn_cast<CallInst>(U))
+ if (CI->getFunction() == &F)
+ ToLower.push_back(CI);
if (ToLower.empty())
return false;
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 296becb31e8f..c05906649f16 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -18,11 +18,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -704,10 +704,10 @@ public:
// We may remove II. By default continue on the next/prev instruction.
++II;
// If we were to erase II, move again.
- auto EraseFromParent = [&II](Value *V) {
+ auto EraseFromParent = [&II, &BB](Value *V) {
auto *Inst = cast<Instruction>(V);
if (Inst->use_empty()) {
- if (Inst == &*II) {
+ if (II != BB.rend() && Inst == &*II) {
++II;
}
Inst->eraseFromParent();
@@ -718,7 +718,7 @@ public:
Instruction *NewInst = nullptr;
IRBuilder<> IB(&I);
- MatrixBuilder<IRBuilder<>> Builder(IB);
+ MatrixBuilder Builder(IB);
Value *TA, *TAMA, *TAMB;
ConstantInt *R, *K, *C;
@@ -766,28 +766,25 @@ public:
// If we have a TT matmul, lift the transpose. We may be able to fold into
// consuming multiply.
for (BasicBlock &BB : Func) {
- for (BasicBlock::iterator II = BB.begin(); II != BB.end();) {
- Instruction *I = &*II;
- // We may remove I.
- ++II;
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
Value *A, *B, *AT, *BT;
ConstantInt *R, *K, *C;
// A^t * B ^t -> (B * A)^t
- if (match(&*I, m_Intrinsic<Intrinsic::matrix_multiply>(
- m_Value(A), m_Value(B), m_ConstantInt(R),
- m_ConstantInt(K), m_ConstantInt(C))) &&
+ if (match(&I, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(A), m_Value(B), m_ConstantInt(R),
+ m_ConstantInt(K), m_ConstantInt(C))) &&
match(A, m_Intrinsic<Intrinsic::matrix_transpose>(m_Value(AT))) &&
match(B, m_Intrinsic<Intrinsic::matrix_transpose>(m_Value((BT))))) {
- IRBuilder<> IB(&*I);
- MatrixBuilder<IRBuilder<>> Builder(IB);
+ IRBuilder<> IB(&I);
+ MatrixBuilder Builder(IB);
Value *M = Builder.CreateMatrixMultiply(
BT, AT, C->getZExtValue(), K->getZExtValue(), R->getZExtValue());
setShapeInfo(M, {C, R});
Instruction *NewInst = Builder.CreateMatrixTranspose(
M, C->getZExtValue(), R->getZExtValue());
- ReplaceAllUsesWith(*I, NewInst);
- if (I->use_empty())
- I->eraseFromParent();
+ ReplaceAllUsesWith(I, NewInst);
+ if (I.use_empty())
+ I.eraseFromParent();
if (A->use_empty())
cast<Instruction>(A)->eraseFromParent();
if (A != B && B->use_empty())
@@ -891,27 +888,27 @@ public:
// having to update as many def-use and use-def chains.
//
// Because we add to ToRemove during fusion we can't guarantee that defs
- // are before uses. Change uses to undef temporarily as these should get
+ // are before uses. Change uses to poison temporarily as these should get
// removed as well.
//
- // For verification, we keep track of where we changed uses to undefs in
- // UndefedInsts and then check that we in fact remove them.
- SmallSet<Instruction *, 16> UndefedInsts;
+ // For verification, we keep track of where we changed uses to poison in
+ // PoisonedInsts and then check that we in fact remove them.
+ SmallSet<Instruction *, 16> PoisonedInsts;
for (auto *Inst : reverse(ToRemove)) {
for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
- if (auto *Undefed = dyn_cast<Instruction>(U.getUser()))
- UndefedInsts.insert(Undefed);
- U.set(UndefValue::get(Inst->getType()));
+ if (auto *Poisoned = dyn_cast<Instruction>(U.getUser()))
+ PoisonedInsts.insert(Poisoned);
+ U.set(PoisonValue::get(Inst->getType()));
}
Inst->eraseFromParent();
- UndefedInsts.erase(Inst);
+ PoisonedInsts.erase(Inst);
}
- if (!UndefedInsts.empty()) {
- // If we didn't remove all undefed instructions, it's a hard error.
- dbgs() << "Undefed but present instructions:\n";
- for (auto *I : UndefedInsts)
+ if (!PoisonedInsts.empty()) {
+ // If we didn't remove all poisoned instructions, it's a hard error.
+ dbgs() << "Poisoned but present instructions:\n";
+ for (auto *I : PoisonedInsts)
dbgs() << *I << "\n";
- llvm_unreachable("Undefed but instruction not removed");
+ llvm_unreachable("Poisoned but instruction not removed");
}
return Changed;
@@ -1670,7 +1667,7 @@ public:
for (unsigned I = 0; I < NewNumVecs; ++I) {
// Build a single result vector. First initialize it.
- Value *ResultVector = UndefValue::get(
+ Value *ResultVector = PoisonValue::get(
FixedVectorType::get(VectorTy->getElementType(), NewNumElts));
// Go through the old elements and insert it into the resulting vector.
for (auto J : enumerate(InputMatrix.vectors())) {
diff --git a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
index 73b2cd06fa23..e2de322933bc 100644
--- a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -13,8 +13,6 @@
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -24,7 +22,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/GuardUtils.h"
using namespace llvm;
@@ -50,9 +47,13 @@ static bool lowerWidenableCondition(Function &F) {
using namespace llvm::PatternMatch;
SmallVector<CallInst *, 8> ToLower;
- for (auto &I : instructions(F))
- if (match(&I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
- ToLower.push_back(cast<CallInst>(&I));
+ // Traverse through the users of WCDecl.
+ // This is presumably cheaper than traversing all instructions in the
+ // function.
+ for (auto *U : WCDecl->users())
+ if (auto *CI = dyn_cast<CallInst>(U))
+ if (CI->getFunction() == &F)
+ ToLower.push_back(CI);
if (ToLower.empty())
return false;
diff --git a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 5ffae128f5f0..a3f09a5a33c3 100644
--- a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -33,13 +33,11 @@
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6698db26626b..1f5bc69acecd 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,14 +28,12 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -45,7 +43,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -61,15 +58,13 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <utility>
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
- "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
+ "enable-memcpyopt-without-libcalls", cl::Hidden,
cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
@@ -100,7 +95,7 @@ struct MemsetRange {
Value *StartPtr;
/// Alignment - The known alignment of the first store.
- unsigned Alignment;
+ MaybeAlign Alignment;
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
@@ -182,16 +177,16 @@ public:
TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
- SI->getAlign().value(), SI);
+ SI->getAlign(), SI);
}
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
}
- void addRange(int64_t Start, int64_t Size, Value *Ptr,
- unsigned Alignment, Instruction *Inst);
+ void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
+ Instruction *Inst);
};
} // end anonymous namespace
@@ -200,7 +195,7 @@ public:
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
- unsigned Alignment, Instruction *Inst) {
+ MaybeAlign Alignment, Instruction *Inst) {
int64_t End = Start+Size;
range_iterator I = partition_point(
@@ -352,9 +347,25 @@ static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
// Check for mod of Loc between Start and End, excluding both boundaries.
// Start and End can be in different blocks.
-static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
- const MemoryUseOrDef *Start,
+static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA,
+ MemoryLocation Loc, const MemoryUseOrDef *Start,
const MemoryUseOrDef *End) {
+ if (isa<MemoryUse>(End)) {
+ // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
+ // Manually check read accesses between Start and End, if they are in the
+ // same block, for clobbers. Otherwise assume Loc is clobbered.
+ return Start->getBlock() != End->getBlock() ||
+ any_of(
+ make_range(std::next(Start->getIterator()), End->getIterator()),
+ [&AA, Loc](const MemoryAccess &Acc) {
+ if (isa<MemoryUse>(&Acc))
+ return false;
+ Instruction *AccInst =
+ cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
+ return isModSet(AA.getModRefInfo(AccInst, Loc));
+ });
+ }
+
// TODO: Only walk until we hit Start.
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
End->getDefiningAccess(), Loc);
@@ -492,7 +503,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
StartPtr = Range.StartPtr;
AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
- MaybeAlign(Range.Alignment));
+ Range.Alignment);
LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
: Range.TheStores) dbgs()
<< *SI << '\n';
@@ -749,36 +760,25 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
- CallInst *C = nullptr;
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
-
- if (C) {
- // Check that nothing touches the dest of the "copy" between
- // the call and the store.
- MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
- C = nullptr;
- }
+ auto GetCall = [&]() -> CallInst * {
+ // We defer this expensive clobber walk until the cheap checks
+ // have been done on the source inside performCallSlotOptzn.
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI)))
+ return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
+ return nullptr;
+ };
- if (C) {
- bool changed = performCallSlotOptzn(
- LI, SI, SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- commonAlignment(SI->getAlign(), LI->getAlign()), C);
- if (changed) {
- eraseInstruction(SI);
- eraseInstruction(LI);
- ++NumMemCpyInstr;
- return true;
- }
+ bool changed = performCallSlotOptzn(
+ LI, SI, SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ DL.getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(SI->getAlign(), LI->getAlign()), GetCall);
+ if (changed) {
+ eraseInstruction(SI);
+ eraseInstruction(LI);
+ ++NumMemCpyInstr;
+ return true;
}
}
}
@@ -853,7 +853,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, TypeSize cpySize,
- Align cpyAlign, CallInst *C) {
+ Align cpyAlign,
+ std::function<CallInst *()> GetC) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -872,11 +873,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (cpySize.isScalable())
return false;
- // Lifetime marks shouldn't be operated on.
- if (Function *F = C->getCalledFunction())
- if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
@@ -893,6 +889,33 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (cpySize < srcSize)
return false;
+ CallInst *C = GetC();
+ if (!C)
+ return false;
+
+ // Lifetime marks shouldn't be operated on.
+ if (Function *F = C->getCalledFunction())
+ if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
+ return false;
+
+
+ if (C->getParent() != cpyStore->getParent()) {
+ LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
+ return false;
+ }
+
+ MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
+ MemoryLocation::get(cpyStore) :
+ MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
+
+ // Check that nothing touches the dest of the copy between
+ // the call and the store/memcpy.
+ if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(cpyStore))) {
+ LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
+ return false;
+ }
+
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
@@ -902,6 +925,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
}
+
// Make sure that nothing can observe cpyDest being written early. There are
// a number of cases to consider:
// 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
@@ -1118,7 +1142,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// then we could still perform the xform by moving M up to the first memcpy.
// TODO: It would be sufficient to check the MDep source up to the memcpy
// size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
return false;
@@ -1215,14 +1239,14 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
}
// By default, create an unaligned memset.
- unsigned Align = 1;
+ Align Alignment = Align(1);
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
// of the sum.
- const unsigned DestAlign =
- std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
+ const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
+ MemCpy->getDestAlign().valueOrOne());
if (DestAlign > 1)
if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
- Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
+ Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
IRBuilder<> Builder(MemCpy);
@@ -1241,11 +1265,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
unsigned DestAS = Dest->getType()->getPointerAddressSpace();
Instruction *NewMemSet = Builder.CreateMemSet(
- Builder.CreateGEP(Builder.getInt8Ty(),
- Builder.CreatePointerCast(Dest,
- Builder.getInt8PtrTy(DestAS)),
- SrcSize),
- MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
+ Builder.CreateGEP(
+ Builder.getInt8Ty(),
+ Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
+ SrcSize),
+ MemSet->getOperand(1), MemsetLen, Alignment);
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
"MemCpy must be a MemoryDef");
@@ -1402,7 +1426,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
+ MemoryAccess *AnyClobber = MA->getDefiningAccess();
MemoryLocation DestLoc = MemoryLocation::getForDest(M);
const MemoryAccess *DestClobber =
MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
@@ -1431,28 +1456,20 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for
- // now. Additionally, we need to ensure that there are no accesses
- // to dest between the call and the memcpy. Accesses to src will be
- // checked by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(
- M, M, M->getDest(), M->getSource(),
- TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
- C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ [C]() -> CallInst * { return C; })) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
}
}
}
@@ -1557,7 +1574,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
return false;
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index aac0deea5be3..ce01ae5b2692 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -144,31 +144,33 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
return {};
}
- Value *const Addr = LoadI->getOperand(0);
+ Value *Addr = LoadI->getOperand(0);
if (Addr->getType()->getPointerAddressSpace() != 0) {
LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
return {};
}
- auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
- if (!GEP)
- return {};
- LLVM_DEBUG(dbgs() << "GEP\n");
- if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
- LLVM_DEBUG(dbgs() << "used outside of block\n");
- return {};
- }
- const auto &DL = GEP->getModule()->getDataLayout();
- if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) {
+ const auto &DL = LoadI->getModule()->getDataLayout();
+ if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) {
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
// We need to make sure that we can do comparison in any order, so we
// require memory to be unconditionnally dereferencable.
return {};
}
- APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
- if (!GEP->accumulateConstantOffset(DL, Offset))
- return {};
- return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
- Offset);
+
+ APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0);
+ Value *Base = Addr;
+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr);
+ if (GEP) {
+ LLVM_DEBUG(dbgs() << "GEP\n");
+ if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
+ LLVM_DEBUG(dbgs() << "used outside of block\n");
+ return {};
+ }
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return {};
+ Base = GEP->getPointerOperand();
+ }
+ return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset);
}
// A comparison between two BCE atoms, e.g. `a == o.a` in the example at the
@@ -244,7 +246,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
auto MayClobber = [&](LoadInst *LI) {
// If a potentially clobbering instruction comes before the load,
// we can still safely sink the load.
- return !Inst->comesBefore(LI) &&
+ return (Inst->getParent() != LI->getParent() || !Inst->comesBefore(LI)) &&
isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
};
if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
@@ -270,9 +272,8 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
}
// Do the actual spliting.
- for (Instruction *Inst : reverse(OtherInsts)) {
- Inst->moveBefore(&*NewParent->begin());
- }
+ for (Instruction *Inst : reverse(OtherInsts))
+ Inst->moveBefore(*NewParent, NewParent->begin());
}
bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
@@ -368,8 +369,11 @@ Optional<BCECmpBlock> visitCmpBlock(Value *const Val, BasicBlock *const Block,
return None;
BCECmpBlock::InstructionSet BlockInsts(
- {Result->Lhs.GEP, Result->Rhs.GEP, Result->Lhs.LoadI, Result->Rhs.LoadI,
- Result->CmpI, BranchI});
+ {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI});
+ if (Result->Lhs.GEP)
+ BlockInsts.insert(Result->Lhs.GEP);
+ if (Result->Rhs.GEP)
+ BlockInsts.insert(Result->Rhs.GEP);
return BCECmpBlock(std::move(*Result), Block, BlockInsts);
}
@@ -604,8 +608,15 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
NextCmpBlock->getParent(), InsertBefore);
IRBuilder<> Builder(BB);
// Add the GEPs from the first BCECmpBlock.
- Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
- Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+ Value *Lhs, *Rhs;
+ if (FirstCmp.Lhs().GEP)
+ Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
+ else
+ Lhs = FirstCmp.Lhs().LoadI->getPointerOperand();
+ if (FirstCmp.Rhs().GEP)
+ Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+ else
+ Rhs = FirstCmp.Rhs().LoadI->getPointerOperand();
Value *IsEqual = nullptr;
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 734532a6670c..6383d6ea838b 100644
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -76,13 +76,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index f35c9212a6f9..876ef3c427a6 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -88,8 +88,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -1076,6 +1074,9 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
Value *Arg1, Value *Arg2,
Instruction *I) const {
auto *E = new (ExpressionAllocator) BasicExpression(2);
+ // TODO: we need to remove context instruction after Value Tracking
+ // can run without context instruction
+ const SimplifyQuery Q = SQ.getWithInstruction(I);
E->setType(T);
E->setOpcode(Opcode);
@@ -1091,7 +1092,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
E->op_push_back(lookupOperandLeader(Arg1));
E->op_push_back(lookupOperandLeader(Arg2));
- Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);
+ Value *V = simplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), Q);
if (auto Simplified = checkExprResults(E, I, V)) {
addAdditionalUsers(Simplified, I);
return Simplified.Expr;
@@ -1147,6 +1148,9 @@ NewGVN::ExprResult NewGVN::checkExprResults(Expression *E, Instruction *I,
NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
+ // TODO: we need to remove context instruction after Value Tracking
+ // can run without context instruction
+ const SimplifyQuery Q = SQ.getWithInstruction(I);
bool AllConstant = setBasicExpressionInfo(I, E);
@@ -1169,13 +1173,13 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
E->setOpcode((CI->getOpcode() << 8) | Predicate);
- // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands
+ // TODO: 25% of our time is spent in simplifyCmpInst with pointer operands
assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() &&
"Wrong types on cmp instruction");
assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
E->getOperand(1)->getType() == I->getOperand(1)->getType()));
Value *V =
- SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);
+ simplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (isa<SelectInst>(I)) {
@@ -1183,26 +1187,26 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
E->getOperand(1) == E->getOperand(2)) {
assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
E->getOperand(2)->getType() == I->getOperand(2)->getType());
- Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
- E->getOperand(2), SQ);
+ Value *V = simplifySelectInst(E->getOperand(0), E->getOperand(1),
+ E->getOperand(2), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
}
} else if (I->isBinaryOp()) {
Value *V =
- SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
+ simplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (auto *CI = dyn_cast<CastInst>(I)) {
Value *V =
- SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
+ simplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
Value *V =
- SimplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(),
+ simplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(),
makeArrayRef(std::next(E->op_begin()), E->op_end()),
- GEPI->isInBounds(), SQ);
+ GEPI->isInBounds(), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (AllConstant) {
@@ -1453,10 +1457,12 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
if (Offset >= 0) {
if (auto *C = dyn_cast<Constant>(
lookupOperandLeader(DepSI->getValueOperand()))) {
- LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI
- << " to constant " << *C << "\n");
- return createConstantExpression(
- getConstantStoreValueForLoad(C, Offset, LoadType, DL));
+ if (Constant *Res =
+ getConstantStoreValueForLoad(C, Offset, LoadType, DL)) {
+ LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI
+ << " to constant " << *Res << "\n");
+ return createConstantExpression(Res);
+ }
}
}
} else if (auto *DepLI = dyn_cast<LoadInst>(DepInst)) {
@@ -1503,9 +1509,8 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
else if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
return createConstantExpression(UndefValue::get(LoadType));
- } else if (isAllocationFn(DepInst, TLI))
- if (auto *InitVal = getInitialValueOfAllocation(cast<CallBase>(DepInst),
- TLI, LoadType))
+ } else if (auto *InitVal =
+ getInitialValueOfAllocation(DepInst, TLI, LoadType))
return createConstantExpression(InitVal);
return nullptr;
@@ -3142,9 +3147,8 @@ bool NewGVN::singleReachablePHIPath(
// connected component finding in this routine, and it's probably not worth
// the complexity for the time being. So, we just keep a set of visited
// MemoryAccess and return true when we hit a cycle.
- if (Visited.count(First))
+ if (!Visited.insert(First).second)
return true;
- Visited.insert(First);
const auto *EndDef = First;
for (auto *ChainDef : optimized_def_chain(First)) {
@@ -3353,7 +3357,7 @@ void NewGVN::verifyStoreExpressions() const {
// instruction set, propagating value numbers, marking things touched, etc,
// until the set of touched instructions is completely empty.
void NewGVN::iterateTouchedInstructions() {
- unsigned int Iterations = 0;
+ uint64_t Iterations = 0;
// Figure out where touchedinstructions starts
int FirstInstr = TouchedInstructions.find_first();
// Nothing set, nothing to iterate, just return.
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index e0d0301c1ef6..689a2a286cb9 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -125,6 +125,9 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
if (Call->isNoBuiltin() || Call->isStrictFP())
continue;
+ if (Call->isMustTailCall())
+ continue;
+
// Skip if function either has local linkage or is not a known library
// function.
LibFunc LF;
@@ -137,7 +140,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
case LibFunc_sqrt:
if (TTI->haveFastSqrt(Call->getType()) &&
optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
- DTU.hasValue() ? DTU.getPointer() : nullptr))
+ DTU ? DTU.getPointer() : nullptr))
break;
continue;
default:
diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index a110f7d5c241..e1cc3fc71c3e 100644
--- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -53,9 +53,9 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -65,6 +65,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "safepoint-placement"
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index c354fa177a60..da1737979305 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -42,7 +41,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -54,7 +52,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -183,7 +180,7 @@ void ReassociatePass::BuildRankMap(Function &F,
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (Instruction &I : *BB)
- if (mayBeMemoryDependent(I))
+ if (mayHaveNonDefUseDependency(I))
ValueRankMap[&I] = ++BBRank;
}
}
@@ -1076,7 +1073,7 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
BinaryOperator *Mul =
BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
- Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
+ Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
Mul->takeName(Shl);
// Everyone now refers to the mul instruction.
diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index a49b9ad3f62b..9dc64493a9ee 100644
--- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -24,8 +24,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index b795ad3899bc..51e4a5773f3e 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -258,6 +258,7 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
using DefiningValueMapTy = MapVector<Value *, Value *>;
+using IsKnownBaseMapTy = MapVector<Value *, bool>;
using PointerToBaseTy = MapVector<Value *, Value *>;
using StatepointLiveSetTy = SetVector<Value *>;
using RematerializedValueMapTy =
@@ -281,19 +282,29 @@ struct PartiallyConstructedSafepointRecord {
RematerializedValueMapTy RematerializedValues;
};
+struct RematerizlizationCandidateRecord {
+ // Chain from derived pointer to base.
+ SmallVector<Instruction *, 3> ChainToBase;
+ // Original base.
+ Value *RootOfChain;
+ // Cost of chain.
+ InstructionCost Cost;
+};
+using RematCandTy = MapVector<Value *, RematerizlizationCandidateRecord>;
+
} // end anonymous namespace
static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
Optional<OperandBundleUse> DeoptBundle =
Call->getOperandBundle(LLVMContext::OB_deopt);
- if (!DeoptBundle.hasValue()) {
+ if (!DeoptBundle) {
assert(AllowStatepointWithNoDeoptInfo &&
"Found non-leaf call without deopt info!");
return None;
}
- return DeoptBundle.getValue().Inputs;
+ return DeoptBundle->Inputs;
}
/// Compute the live-in set for every basic block in the function
@@ -385,45 +396,16 @@ static void analyzeParsePointLiveness(
Result.LiveSet = LiveSet;
}
-// Returns true is V is a knownBaseResult.
-static bool isKnownBaseResult(Value *V);
-
-// Returns true if V is a BaseResult that already exists in the IR, i.e. it is
-// not created by the findBasePointers algorithm.
-static bool isOriginalBaseResult(Value *V);
+/// Returns true if V is a known base.
+static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
-namespace {
-
-/// A single base defining value - An immediate base defining value for an
-/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
-/// For instructions which have multiple pointer [vector] inputs or that
-/// transition between vector and scalar types, there is no immediate base
-/// defining value. The 'base defining value' for 'Def' is the transitive
-/// closure of this relation stopping at the first instruction which has no
-/// immediate base defining value. The b.d.v. might itself be a base pointer,
-/// but it can also be an arbitrary derived pointer.
-struct BaseDefiningValueResult {
- /// Contains the value which is the base defining value.
- Value * const BDV;
+/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
+/// in the cache before.
+static void setKnownBase(Value *V, bool IsKnownBase,
+ IsKnownBaseMapTy &KnownBases);
- /// True if the base defining value is also known to be an actual base
- /// pointer.
- const bool IsKnownBase;
-
- BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
- : BDV(BDV), IsKnownBase(IsKnownBase) {
-#ifndef NDEBUG
- // Check consistency between new and old means of checking whether a BDV is
- // a base.
- bool MustBeBase = isKnownBaseResult(BDV);
- assert(!MustBeBase || MustBeBase == IsKnownBase);
-#endif
- }
-};
-
-} // end anonymous namespace
-
-static BaseDefiningValueResult findBaseDefiningValue(Value *I);
+static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
@@ -434,76 +416,122 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
/// particular element in 'I'.
-static BaseDefiningValueResult
-findBaseDefiningValueOfVector(Value *I) {
+static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
- if (isa<Argument>(I))
+ auto Cached = Cache.find(I);
+ if (Cached != Cache.end())
+ return Cached->second;
+
+ if (isa<Argument>(I)) {
// An incoming argument to the function is a base pointer
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (isa<Constant>(I))
+ if (isa<Constant>(I)) {
// Base of constant vector consists only of constant null pointers.
// For reasoning see similar case inside 'findBaseDefiningValue' function.
- return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
- true);
+ auto *CAZ = ConstantAggregateZero::get(I->getType());
+ Cache[I] = CAZ;
+ setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
+ return CAZ;
+ }
- if (isa<LoadInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<LoadInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (isa<InsertElementInst>(I))
+ if (isa<InsertElementInst>(I)) {
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
+ }
- if (isa<ShuffleVectorInst>(I))
+ if (isa<ShuffleVectorInst>(I)) {
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
// TODO: There a number of local optimizations which could be applied here
// for particular sufflevector patterns.
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
+ }
// The behavior of getelementptr instructions is the same for vector and
// non-vector data types.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- return findBaseDefiningValue(GEP->getPointerOperand());
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ auto *BDV =
+ findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
+ Cache[GEP] = BDV;
+ return BDV;
+ }
+
+ // The behavior of freeze instructions is the same for vector and
+ // non-vector data types.
+ if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
+ auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
+ Cache[Freeze] = BDV;
+ return BDV;
+ }
// If the pointer comes through a bitcast of a vector of pointers to
// a vector of another type of pointer, then look through the bitcast
- if (auto *BC = dyn_cast<BitCastInst>(I))
- return findBaseDefiningValue(BC->getOperand(0));
+ if (auto *BC = dyn_cast<BitCastInst>(I)) {
+ auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
+ Cache[BC] = BDV;
+ return BDV;
+ }
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"unknown vector instruction - no base found for vector element");
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
}
/// Helper function for findBasePointer - Will return a value which either a)
/// defines the base pointer for the input, b) blocks the simple search
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
-static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
assert(I->getType()->isPtrOrPtrVectorTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
+ auto Cached = Cache.find(I);
+ if (Cached != Cache.end())
+ return Cached->second;
if (I->getType()->isVectorTy())
- return findBaseDefiningValueOfVector(I);
+ return findBaseDefiningValueOfVector(I, Cache, KnownBases);
- if (isa<Argument>(I))
+ if (isa<Argument>(I)) {
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
if (isa<Constant>(I)) {
// We assume that objects with a constant base (e.g. a global) can't move
@@ -516,8 +544,10 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// "phi (const1, const2)" or "phi (const, regular gc ptr)".
// See constant.ll file for relevant test cases.
- return BaseDefiningValueResult(
- ConstantPointerNull::get(cast<PointerType>(I->getType())), true);
+ auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
+ Cache[I] = CPN;
+ setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
+ return CPN;
}
// inttoptrs in an integral address space are currently ill-defined. We
@@ -525,8 +555,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// constant rule above and because we don't really have a better semantic
// to give them. Note that the optimizer is always free to insert undefined
// behavior on dynamically dead paths as well.
- if (isa<IntToPtrInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<IntToPtrInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
@@ -539,16 +572,31 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
- return findBaseDefiningValue(Def);
+ auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
+ Cache[CI] = BDV;
+ return BDV;
}
- if (isa<LoadInst>(I))
+ if (isa<LoadInst>(I)) {
// The value loaded is an gc base itself
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// The base of this GEP is the base
- return findBaseDefiningValue(GEP->getPointerOperand());
+ auto *BDV =
+ findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
+ Cache[GEP] = BDV;
+ return BDV;
+ }
+
+ if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
+ auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
+ Cache[Freeze] = BDV;
+ return BDV;
+ }
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
@@ -569,24 +617,32 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
llvm_unreachable(
"interaction with the gcroot mechanism is not supported");
case Intrinsic::experimental_gc_get_pointer_base:
- return findBaseDefiningValue(II->getOperand(0));
+ auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
+ Cache[II] = BDV;
+ return BDV;
}
}
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// TODO: I have absolutely no idea how to implement this part yet. It's not
// necessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
- if (isa<AtomicCmpXchgInst>(I))
+ if (isa<AtomicCmpXchgInst>(I)) {
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
// like a load.
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
"binary ops which don't apply to pointers");
@@ -594,8 +650,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// The aggregate ops. Aggregates can either be in the heap or on the
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
- if (isa<ExtractValueInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<ExtractValueInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
@@ -606,6 +665,8 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// substituting gc.get.pointer.base() intrinsic.
bool IsKnownBase =
isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
+ setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
+ Cache[I] = I;
// An extractelement produces a base result exactly when it's input does.
// We may need to insert a parallel instruction to extract the appropriate
@@ -615,33 +676,38 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// Note: There a lot of obvious peephole cases here. This are deliberately
// handled after the main base pointer inference algorithm to make writing
// test cases to exercise that code easier.
- return BaseDefiningValueResult(I, IsKnownBase);
+ return I;
// The last two cases here don't return a base pointer. Instead, they
// return a value which dynamically selects from among several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
- "missing instruction case in findBaseDefiningValing");
- return BaseDefiningValueResult(I, IsKnownBase);
+ "missing instruction case in findBaseDefiningValue");
+ return I;
}
/// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
- Value *&Cached = Cache[I];
- if (!Cached) {
- Cached = findBaseDefiningValue(I).BDV;
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ if (Cache.find(I) == Cache.end()) {
+ auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
+ Cache[I] = BDV;
LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
- << Cached->getName() << "\n");
+ << Cache[I]->getName() << ", is known base = "
+ << KnownBases[I] << "\n");
}
assert(Cache[I] != nullptr);
- return Cached;
+ assert(KnownBases.find(Cache[I]) != KnownBases.end() &&
+ "Cached value must be present in known bases map");
+ return Cache[I];
}
/// Return a base pointer for this value if known. Otherwise, return it's
/// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
- Value *Def = findBaseDefiningValueCached(I, Cache);
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
auto Found = Cache.find(Def);
if (Found != Cache.end()) {
// Either a base-of relation, or a self reference. Caller must check.
@@ -651,6 +717,7 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
return Def;
}
+#ifndef NDEBUG
/// This value is a base pointer that is not generated by RS4GC, i.e. it already
/// exists in the code.
static bool isOriginalBaseResult(Value *V) {
@@ -659,21 +726,22 @@ static bool isOriginalBaseResult(Value *V) {
!isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
!isa<ShuffleVectorInst>(V);
}
+#endif
-/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
-/// is it known to be a base pointer? Or do we need to continue searching.
-static bool isKnownBaseResult(Value *V) {
- if (isOriginalBaseResult(V))
- return true;
- if (isa<Instruction>(V) &&
- cast<Instruction>(V)->getMetadata("is_base_value")) {
- // This is a previously inserted base phi or select. We know
- // that this is a base value.
- return true;
- }
+static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
+ auto It = KnownBases.find(V);
+ assert(It != KnownBases.end() && "Value not present in the map");
+ return It->second;
+}
- // We need to keep searching
- return false;
+static void setKnownBase(Value *V, bool IsKnownBase,
+ IsKnownBaseMapTy &KnownBases) {
+#ifndef NDEBUG
+ auto It = KnownBases.find(V);
+ if (It != KnownBases.end())
+ assert(It->second == IsKnownBase && "Changing already present value");
+#endif
+ KnownBases[V] = IsKnownBase;
}
// Returns true if First and Second values are both scalar or both vector.
@@ -801,10 +869,11 @@ static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
/// For gc objects, this is simply itself. On success, returns a value which is
/// the base pointer. (This is reliable and can be used for relocation.) On
/// failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
- Value *Def = findBaseOrBDV(I, Cache);
+static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ Value *Def = findBaseOrBDV(I, Cache, KnownBases);
- if (isKnownBaseResult(Def) && areBothVectorOrScalar(Def, I))
+ if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
return Def;
// Here's the rough algorithm:
@@ -887,8 +956,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
assert(!isOriginalBaseResult(Current) && "why did it get added?");
auto visitIncomingValue = [&](Value *InVal) {
- Value *Base = findBaseOrBDV(InVal, Cache);
- if (isKnownBaseResult(Base) && areBothVectorOrScalar(Base, InVal))
+ Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
+ if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
// Known bases won't need new instructions introduced and can be
// ignored safely. However, this can only be done when InVal and Base
// are both scalar or both vector. Otherwise, we need to find a
@@ -924,12 +993,16 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
for (auto Pair : States) {
Value *BDV = Pair.first;
auto canPruneInput = [&](Value *V) {
- Value *BDV = findBaseOrBDV(V, Cache);
- if (V->stripPointerCasts() != BDV)
+ // If the input of the BDV is the BDV itself we can prune it. This is
+ // only possible if the BDV is a PHI node.
+ if (V->stripPointerCasts() == BDV)
+ return true;
+ Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
+ if (V->stripPointerCasts() != VBDV)
return false;
// The assumption is that anything not in the state list is
// propagates a base pointer.
- return States.count(BDV) == 0;
+ return States.count(VBDV) == 0;
};
bool CanPrune = true;
@@ -975,13 +1048,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) ||
+ assert((!isKnownBase(BDV, KnownBases) ||
!areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
"why did it get added?");
BDVState NewState(BDV);
visitBDVOperands(BDV, [&](Value *Op) {
- Value *BDV = findBaseOrBDV(Op, Cache);
+ Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
auto OpState = GetStateForBDV(BDV, Op);
NewState.meet(OpState);
});
@@ -1014,8 +1087,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, BaseValue)) &&
- "why did it get added?");
+ assert(
+ (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
+ "why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
@@ -1033,6 +1107,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(I, BDVState::Base, BaseInst);
+ setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
} else if (!isa<VectorType>(I->getType())) {
// We need to handle cases that have a vector base but the instruction is
// a scalar type (these could be phis or selects or any instruction that
@@ -1055,7 +1130,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, State.getBaseValue())) &&
+ assert((!isKnownBase(I, KnownBases) ||
+ !areBothVectorOrScalar(I, State.getBaseValue())) &&
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
@@ -1087,6 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Add metadata marking this as a base value
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(I, BDVState::Conflict, BaseInst);
+ setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
}
#ifndef NDEBUG
@@ -1102,7 +1179,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// assured to be able to determine an instruction which produces it's base
// pointer.
auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
- Value *BDV = findBaseOrBDV(Input, Cache);
+ Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
Value *Base = nullptr;
if (!States.count(BDV)) {
assert(areBothVectorOrScalar(BDV, Input));
@@ -1129,7 +1206,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) ||
+ assert((!isKnownBase(BDV, KnownBases) ||
!areBothVectorOrScalar(BDV, State.getBaseValue())) &&
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
@@ -1154,13 +1231,21 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
Value *OldBase = BlockToValue[InBB];
Value *Base = getBaseForInput(InVal, nullptr);
+
+ // We can't use `stripPointerCasts` instead of this function because
+ // `stripPointerCasts` doesn't handle vectors of pointers.
+ auto StripBitCasts = [](Value *V) -> Value * {
+ while (auto *BC = dyn_cast<BitCastInst>(V))
+ V = BC->getOperand(0);
+ return V;
+ };
// In essence this assert states: the only way two values
// incoming from the same basic block may be different is by
// being different bitcasts of the same value. A cleanup
// that remains TODO is changing findBaseOrBDV to return an
// llvm::Value of the correct type (and still remain pure).
// This will remove the need to add bitcasts.
- assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
+ assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
"findBaseOrBDV should be pure!");
#endif
}
@@ -1223,8 +1308,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) || !areBothVectorOrScalar(BDV, Base)) &&
- "why did it get added?");
+ assert(
+ (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
+ "why did it get added?");
LLVM_DEBUG(
dbgs() << "Updating base value cache"
@@ -1255,9 +1341,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// pointer was a base pointer.
static void findBasePointers(const StatepointLiveSetTy &live,
PointerToBaseTy &PointerToBase, DominatorTree *DT,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
for (Value *ptr : live) {
- Value *base = findBasePointer(ptr, DVCache);
+ Value *base = findBasePointer(ptr, DVCache, KnownBases);
assert(base && "failed to find base pointer");
PointerToBase[ptr] = base;
assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
@@ -1272,7 +1359,8 @@ static void findBasePointers(const StatepointLiveSetTy &live,
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
CallBase *Call,
PartiallyConstructedSafepointRecord &result,
- PointerToBaseTy &PointerToBase) {
+ PointerToBaseTy &PointerToBase,
+ IsKnownBaseMapTy &KnownBases) {
StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
// We assume that all pointers passed to deopt are base pointers; as an
// optimization, we can use this to avoid seperately materializing the base
@@ -1286,7 +1374,8 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
PotentiallyDerivedPointers.remove(V);
PointerToBase[V] = V;
}
- findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache);
+ findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
+ KnownBases);
}
/// Given an updated version of the dataflow liveness results, update the
@@ -1349,23 +1438,23 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] =
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
- AttributeList AL) {
- if (AL.isEmpty())
- return AL;
+ AttributeList OrigAL,
+ AttributeList StatepointAL) {
+ if (OrigAL.isEmpty())
+ return StatepointAL;
// Remove the readonly, readnone, and statepoint function attributes.
- AttrBuilder FnAttrs(Ctx, AL.getFnAttrs());
+ AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
- for (Attribute A : AL.getFnAttrs()) {
+ for (Attribute A : OrigAL.getFnAttrs()) {
if (isStatepointDirectiveAttr(A))
FnAttrs.removeAttribute(A);
}
// Just skip parameter and return attributes for now
- return AttributeList::get(Ctx, AttributeList::FunctionIndex,
- AttributeSet::get(Ctx, FnAttrs));
+ return StatepointAL.addFnAttributes(Ctx, FnAttrs);
}
/// Helper function to place all gc relocates necessary for the given
@@ -1570,8 +1659,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
assert(DeoptLowering.equals("live-through") && "Unsupported value!");
}
- Value *CallTarget = Call->getCalledOperand();
- if (Function *F = dyn_cast<Function>(CallTarget)) {
+ FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
+ if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
auto IID = F->getIntrinsicID();
if (IID == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
@@ -1589,8 +1678,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// the same module. This is fine -- we assume the frontend knew what it
// was doing when generating this kind of IR.
CallTarget = F->getParent()
- ->getOrInsertFunction("__llvm_deoptimize", FTy)
- .getCallee();
+ ->getOrInsertFunction("__llvm_deoptimize", FTy);
IsDeoptimize = true;
} else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
@@ -1686,8 +1774,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
CallTarget =
F->getParent()
- ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
- .getCallee();
+ ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
}
}
@@ -1705,8 +1792,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPCall->setAttributes(
- legalizeCallAttributes(CI->getContext(), CI->getAttributes()));
+ SPCall->setAttributes(legalizeCallAttributes(
+ CI->getContext(), CI->getAttributes(), SPCall->getAttributes()));
Token = cast<GCStatepointInst>(SPCall);
@@ -1732,8 +1819,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPInvoke->setAttributes(
- legalizeCallAttributes(II->getContext(), II->getAttributes()));
+ SPInvoke->setAttributes(legalizeCallAttributes(
+ II->getContext(), II->getAttributes(), SPInvoke->getAttributes()));
Token = cast<GCStatepointInst>(SPInvoke);
@@ -2071,6 +2158,7 @@ static void relocationViaAlloca(
assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
+ (void) NumRematerializedValues;
if (!PromotableAllocas.empty()) {
// Apply mem2reg to promote alloca to SSA
PromoteMemToReg(PromotableAllocas, DT);
@@ -2221,27 +2309,25 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
return true;
}
-// From the statepoint live set pick values that are cheaper to recompute then
-// to relocate. Remove this values from the live set, rematerialize them after
-// statepoint and record them in "Info" structure. Note that similar to
-// relocated values we don't do any user adjustments here.
-static void rematerializeLiveValues(CallBase *Call,
- PartiallyConstructedSafepointRecord &Info,
- PointerToBaseTy &PointerToBase,
- TargetTransformInfo &TTI) {
+// Find derived pointers that can be recomputed cheap enough and fill
+// RematerizationCandidates with such candidates.
+static void
+findRematerializationCandidates(PointerToBaseTy PointerToBase,
+ RematCandTy &RematerizationCandidates,
+ TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
- // Record values we are going to delete from this statepoint live set.
- // We can not di this in following loop due to iterator invalidation.
- SmallVector<Value *, 32> LiveValuesToBeDeleted;
+ for (auto P2B : PointerToBase) {
+ auto *Derived = P2B.first;
+ auto *Base = P2B.second;
+ // Consider only derived pointers.
+ if (Derived == Base)
+ continue;
- for (Value *LiveValue: Info.LiveSet) {
- // For each live pointer find its defining chain
+ // For each live pointer find its defining chain.
SmallVector<Instruction *, 3> ChainToBase;
- assert(PointerToBase.count(LiveValue));
Value *RootOfChain =
- findRematerializableChainToBasePointer(ChainToBase,
- LiveValue);
+ findRematerializableChainToBasePointer(ChainToBase, Derived);
// Nothing to do, or chain is too long
if ( ChainToBase.size() == 0 ||
@@ -2250,9 +2336,9 @@ static void rematerializeLiveValues(CallBase *Call,
// Handle the scenario where the RootOfChain is not equal to the
// Base Value, but they are essentially the same phi values.
- if (RootOfChain != PointerToBase[LiveValue]) {
+ if (RootOfChain != PointerToBase[Derived]) {
PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
- PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]);
+ PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]);
if (!OrigRootPhi || !AlternateRootPhi)
continue;
// PHI nodes that have the same incoming values, and belonging to the same
@@ -2266,33 +2352,61 @@ static void rematerializeLiveValues(CallBase *Call,
// deficiency in the findBasePointer algorithm.
if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
continue;
- // Now that the phi nodes are proved to be the same, assert that
- // findBasePointer's newly generated AlternateRootPhi is present in the
- // liveset of the call.
- assert(Info.LiveSet.count(AlternateRootPhi));
}
- // Compute cost of this chain
+ // Compute cost of this chain.
InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
// TODO: We can also account for cases when we will be able to remove some
// of the rematerialized values by later optimization passes. I.e if
// we rematerialized several intersecting chains. Or if original values
// don't have any uses besides this statepoint.
+ // Ok, there is a candidate.
+ RematerizlizationCandidateRecord Record;
+ Record.ChainToBase = ChainToBase;
+ Record.RootOfChain = RootOfChain;
+ Record.Cost = Cost;
+ RematerizationCandidates.insert({ Derived, Record });
+ }
+}
+
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
+// statepoint and record them in "Info" structure. Note that similar to
+// relocated values we don't do any user adjustments here.
+static void rematerializeLiveValues(CallBase *Call,
+ PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase,
+ RematCandTy &RematerizationCandidates,
+ TargetTransformInfo &TTI) {
+ // Record values we are going to delete from this statepoint live set.
+ // We can not di this in following loop due to iterator invalidation.
+ SmallVector<Value *, 32> LiveValuesToBeDeleted;
+
+ for (Value *LiveValue : Info.LiveSet) {
+ auto It = RematerizationCandidates.find(LiveValue);
+ if (It == RematerizationCandidates.end())
+ continue;
+
+ RematerizlizationCandidateRecord &Record = It->second;
+
+ InstructionCost Cost = Record.Cost;
// For invokes we need to rematerialize each chain twice - for normal and
// for unwind basic blocks. Model this by multiplying cost by two.
- if (isa<InvokeInst>(Call)) {
+ if (isa<InvokeInst>(Call))
Cost *= 2;
- }
- // If it's too expensive - skip it
+
+ // If it's too expensive - skip it.
if (Cost >= RematerializationThreshold)
continue;
// Remove value from the live set
LiveValuesToBeDeleted.push_back(LiveValue);
- // Clone instructions and record them inside "Info" structure
+ // Clone instructions and record them inside "Info" structure.
- // Walk backwards to visit top-most instructions first
+ // For each live pointer find get its defining chain.
+ SmallVector<Instruction *, 3> ChainToBase = Record.ChainToBase;
+ // Walk backwards to visit top-most instructions first.
std::reverse(ChainToBase.begin(), ChainToBase.end());
// Utility function which clones all instructions from "ChainToBase"
@@ -2352,7 +2466,7 @@ static void rematerializeLiveValues(CallBase *Call,
Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
- InsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ InsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
auto *Invoke = cast<InvokeInst>(Call);
@@ -2363,9 +2477,9 @@ static void rematerializeLiveValues(CallBase *Call,
&*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue = rematerializeChain(
- NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ NormalInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Instruction *UnwindRematerializedValue = rematerializeChain(
- UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ UnwindInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2380,7 +2494,8 @@ static void rematerializeLiveValues(CallBase *Call,
static bool inlineGetBaseAndOffset(Function &F,
SmallVectorImpl<CallInst *> &Intrinsics,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
auto &Context = F.getContext();
auto &DL = F.getParent()->getDataLayout();
bool Changed = false;
@@ -2389,7 +2504,8 @@ static bool inlineGetBaseAndOffset(Function &F,
switch (Callsite->getIntrinsicID()) {
case Intrinsic::experimental_gc_get_pointer_base: {
Changed = true;
- Value *Base = findBasePointer(Callsite->getOperand(0), DVCache);
+ Value *Base =
+ findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
assert(!DVCache.count(Callsite));
auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast(
Base, Callsite->getType(), suffixed_name_or(Base, ".cast", ""));
@@ -2404,7 +2520,7 @@ static bool inlineGetBaseAndOffset(Function &F,
case Intrinsic::experimental_gc_get_pointer_offset: {
Changed = true;
Value *Derived = Callsite->getOperand(0);
- Value *Base = findBasePointer(Derived, DVCache);
+ Value *Base = findBasePointer(Derived, DVCache, KnownBases);
assert(!DVCache.count(Callsite));
unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
@@ -2431,7 +2547,8 @@ static bool inlineGetBaseAndOffset(Function &F,
static bool insertParsePoints(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
SmallVectorImpl<CallBase *> &ToUpdate,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
#ifndef NDEBUG
// Validate the input
std::set<CallBase *> Uniqued;
@@ -2487,7 +2604,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// B) Find the base pointers for each live pointer
for (size_t i = 0; i < Records.size(); i++) {
PartiallyConstructedSafepointRecord &info = Records[i];
- findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase);
+ findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
}
if (PrintBasePointers) {
errs() << "Base Pairs (w/o Relocation):\n";
@@ -2563,11 +2680,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
Holders.clear();
+ // Compute the cost of possible re-materialization of derived pointers.
+ RematCandTy RematerizationCandidates;
+ findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
+
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
for (size_t i = 0; i < Records.size(); i++)
- rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI);
+ rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
+ RematerizationCandidates, TTI);
// We need this to safely RAUW and delete call or invoke return values that
// may themselves be live over a statepoint. For details, please see usage in
@@ -2930,13 +3052,18 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// inlineGetBaseAndOffset() and insertParsePoints().
DefiningValueMapTy DVCache;
+ // Mapping between a base values and a flag indicating whether it's a known
+ // base or not.
+ IsKnownBaseMapTy KnownBases;
+
if (!Intrinsics.empty())
// Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
// live references.
- MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache);
+ MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
if (!ParsePointNeeded.empty())
- MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache);
+ MadeChange |=
+ insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index c34da51e6dc1..2282ef636076 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -17,20 +17,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/SCCP.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
@@ -38,14 +33,13 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
@@ -59,7 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/Transforms/Utils/SCCPSolver.h"
#include <cassert>
#include <utility>
#include <vector>
@@ -97,6 +91,18 @@ static bool isOverdefined(const ValueLatticeElement &LV) {
return !LV.isUnknownOrUndef() && !isConstant(LV);
}
+static bool canRemoveInstruction(Instruction *I) {
+ if (wouldInstructionBeTriviallyDead(I))
+ return true;
+
+ // Some instructions can be handled but are rejected above. Catch
+ // those cases by falling through to here.
+ // TODO: Mark globals as being constant earlier, so
+ // TODO: wouldInstructionBeTriviallyDead() knows that atomic loads
+ // TODO: are safe to remove.
+ return isa<LoadInst>(I);
+}
+
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
Constant *Const = nullptr;
if (V->getType()->isStructTy()) {
@@ -127,7 +133,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
// Calls with "clang.arc.attachedcall" implicitly use the return value and
// those uses cannot be updated with a constant.
CallBase *CB = dyn_cast<CallBase>(V);
- if (CB && ((CB->isMustTailCall() && !CB->isSafeToRemove()) ||
+ if (CB && ((CB->isMustTailCall() &&
+ !canRemoveInstruction(CB)) ||
CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall))) {
Function *F = CB->getCalledFunction();
@@ -156,7 +163,7 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
if (Inst.getType()->isVoidTy())
continue;
if (tryToReplaceWithConstant(Solver, &Inst)) {
- if (Inst.isSafeToRemove())
+ if (canRemoveInstruction(&Inst))
Inst.eraseFromParent();
MadeChanges = true;
@@ -170,6 +177,7 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
continue;
if (IV.getConstantRange().isAllNonNegative()) {
auto *ZExt = new ZExtInst(ExtOp, Inst.getType(), "", &Inst);
+ ZExt->takeName(&Inst);
InsertedValues.insert(ZExt);
Inst.replaceAllUsesWith(ZExt);
Solver.removeLatticeValueFor(&Inst);
@@ -182,10 +190,14 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
return MadeChanges;
}
+static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
+ DomTreeUpdater &DTU,
+ BasicBlock *&NewUnreachableBB);
+
// runSCCP() - Run the Sparse Conditional Constant Propagation algorithm,
// and return true if the function was modified.
static bool runSCCP(Function &F, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI, DomTreeUpdater &DTU) {
LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
SCCPSolver Solver(
DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; },
@@ -213,13 +225,12 @@ static bool runSCCP(Function &F, const DataLayout &DL,
// as we cannot modify the CFG of the function.
SmallPtrSet<Value *, 32> InsertedValues;
+ SmallVector<BasicBlock *, 8> BlocksToErase;
for (BasicBlock &BB : F) {
if (!Solver.isBlockExecutable(&BB)) {
LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB);
-
++NumDeadBlocks;
- NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first;
-
+ BlocksToErase.push_back(&BB);
MadeChanges = true;
continue;
}
@@ -228,17 +239,32 @@ static bool runSCCP(Function &F, const DataLayout &DL,
NumInstRemoved, NumInstReplaced);
}
+ // Remove unreachable blocks and non-feasible edges.
+ for (BasicBlock *DeadBB : BlocksToErase)
+ NumInstRemoved += changeToUnreachable(DeadBB->getFirstNonPHI(),
+ /*PreserveLCSSA=*/false, &DTU);
+
+ BasicBlock *NewUnreachableBB = nullptr;
+ for (BasicBlock &BB : F)
+ MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB);
+
+ for (BasicBlock *DeadBB : BlocksToErase)
+ if (!DeadBB->hasAddressTaken())
+ DTU.deleteBB(DeadBB);
+
return MadeChanges;
}
PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) {
const DataLayout &DL = F.getParent()->getDataLayout();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- if (!runSCCP(F, DL, &TLI))
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ if (!runSCCP(F, DL, &TLI, DTU))
return PreservedAnalyses::all();
auto PA = PreservedAnalyses();
- PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
@@ -261,7 +287,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
- AU.setPreservesCFG();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
// runOnFunction - Run the Sparse Conditional Constant Propagation
@@ -272,7 +298,10 @@ public:
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- return runSCCP(F, DL, TLI);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr,
+ DomTreeUpdater::UpdateStrategy::Lazy);
+ return runSCCP(F, DL, TLI, DTU);
}
};
@@ -342,7 +371,8 @@ static void findReturnsToZap(Function &F,
}
static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
- DomTreeUpdater &DTU) {
+ DomTreeUpdater &DTU,
+ BasicBlock *&NewUnreachableBB) {
SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors;
bool HasNonFeasibleEdges = false;
for (BasicBlock *Succ : successors(BB)) {
@@ -362,7 +392,19 @@ static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
isa<IndirectBrInst>(TI)) &&
"Terminator must be a br, switch or indirectbr");
- if (FeasibleSuccessors.size() == 1) {
+ if (FeasibleSuccessors.size() == 0) {
+ // Branch on undef/poison, replace with unreachable.
+ SmallPtrSet<BasicBlock *, 8> SeenSuccs;
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB);
+ if (SeenSuccs.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+ TI->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ DTU.applyUpdatesPermissive(Updates);
+ } else if (FeasibleSuccessors.size() == 1) {
// Replace with an unconditional branch to the only feasible successor.
BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin();
SmallVector<DominatorTree::UpdateType, 8> Updates;
@@ -385,6 +427,23 @@ static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
} else if (FeasibleSuccessors.size() > 1) {
SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI));
SmallVector<DominatorTree::UpdateType, 8> Updates;
+
+ // If the default destination is unfeasible it will never be taken. Replace
+ // it with a new block with a single Unreachable instruction.
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ if (!FeasibleSuccessors.contains(DefaultDest)) {
+ if (!NewUnreachableBB) {
+ NewUnreachableBB =
+ BasicBlock::Create(DefaultDest->getContext(), "default.unreachable",
+ DefaultDest->getParent(), DefaultDest);
+ new UnreachableInst(DefaultDest->getContext(), NewUnreachableBB);
+ }
+
+ SI->setDefaultDest(NewUnreachableBB);
+ Updates.push_back({DominatorTree::Delete, BB, DefaultDest});
+ Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB});
+ }
+
for (auto CI = SI->case_begin(); CI != SI->case_end();) {
if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) {
++CI;
@@ -532,11 +591,13 @@ bool llvm::runIPSCCP(
NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(),
/*PreserveLCSSA=*/false, &DTU);
+ BasicBlock *NewUnreachableBB = nullptr;
for (BasicBlock &BB : F)
- MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU);
+ MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB);
for (BasicBlock *DeadBB : BlocksToErase)
- DTU.deleteBB(DeadBB);
+ if (!DeadBB->hasAddressTaken())
+ DTU.deleteBB(DeadBB);
for (BasicBlock &BB : F) {
for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 8be8946702be..143a035749c7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -57,11 +57,9 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -78,14 +76,12 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
-#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstring>
@@ -1016,7 +1012,7 @@ private:
I.getParent()->getFirstInsertionPt() == I.getParent()->end())
return PI.setAborted(&I);
- // TODO: We could use SimplifyInstruction here to fold PHINodes and
+ // TODO: We could use simplifyInstruction here to fold PHINodes and
// SelectInsts. However, doing so requires to change the current
// dead-operand-tracking mechanism. For instance, suppose neither loading
// from %U nor %other traps. Then "load (select undef, %U, %other)" does not
@@ -1987,13 +1983,22 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
+ Use *U = S.getUse();
+
+ // Lifetime intrinsics operate over the whole alloca whose sizes are usually
+ // larger than other load/store slices (RelEnd > Size). But lifetime are
+ // always promotable and should not impact other slices' promotability of the
+ // partition.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+ if (II->isLifetimeStartOrEnd() || II->isDroppable())
+ return true;
+ }
+
// We can't reasonably handle cases where the load or store extends past
// the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
- Use *U = S.getUse();
-
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
@@ -2048,9 +2053,6 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
return false;
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
- return false;
} else {
return false;
}
@@ -2179,10 +2181,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
return V;
}
- SmallVector<int, 8> Mask;
- Mask.reserve(NumElements);
- for (unsigned i = BeginIndex; i != EndIndex; ++i)
- Mask.push_back(i);
+ auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
@@ -2734,10 +2733,9 @@ private:
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
V = IRB.CreateMul(
IRB.CreateZExt(V, SplatIntTy, "zext"),
- ConstantExpr::getUDiv(
- Constant::getAllOnesValue(SplatIntTy),
- ConstantExpr::getZExt(Constant::getAllOnesValue(V->getType()),
- SplatIntTy)),
+ IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
+ IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
"isplat");
return V;
}
@@ -2887,7 +2885,7 @@ private:
assert((IsDest && II.getRawDest() == OldPtr) ||
(!IsDest && II.getRawSource() == OldPtr));
- MaybeAlign SliceAlign = getSliceAlign();
+ Align SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -3481,19 +3479,13 @@ private:
Type *Ty = GEPI.getSourceElementType();
Value *True = Sel->getTrueValue();
- Value *NTrue =
- IsInBounds
- ? IRB.CreateInBoundsGEP(Ty, True, Index,
- True->getName() + ".sroa.gep")
- : IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep");
+ Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep",
+ IsInBounds);
Value *False = Sel->getFalseValue();
- Value *NFalse =
- IsInBounds
- ? IRB.CreateInBoundsGEP(Ty, False, Index,
- False->getName() + ".sroa.gep")
- : IRB.CreateGEP(Ty, False, Index, False->getName() + ".sroa.gep");
+ Value *NFalse = IRB.CreateGEP(Ty, False, Index,
+ False->getName() + ".sroa.gep", IsInBounds);
Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
Sel->getName() + ".sroa.sel");
@@ -3547,10 +3539,8 @@ private:
IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
Type *Ty = GEPI.getSourceElementType();
- NewVal = IsInBounds ? IRB.CreateInBoundsGEP(Ty, In, Index,
- In->getName() + ".sroa.gep")
- : IRB.CreateGEP(Ty, In, Index,
- In->getName() + ".sroa.gep");
+ NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
+ IsInBounds);
}
NewPN->addIncoming(NewVal, B);
}
@@ -3972,16 +3962,15 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
for (LoadInst *LI : Loads) {
SplitLoads.clear();
- IntegerType *Ty = cast<IntegerType>(LI->getType());
- assert(Ty->getBitWidth() % 8 == 0);
- uint64_t LoadSize = Ty->getBitWidth() / 8;
- assert(LoadSize > 0 && "Cannot have a zero-sized integer load!");
-
auto &Offsets = SplitOffsetsMap[LI];
- assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
- "Slice size should always match load size exactly!");
+ unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
+ assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
+ "Load must have type size equal to store size");
+ assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
+ "Load must be >= slice size");
+
uint64_t BaseOffset = Offsets.S->beginOffset();
- assert(BaseOffset + LoadSize > BaseOffset &&
+ assert(BaseOffset + SliceSize > BaseOffset &&
"Cannot represent alloca access size using 64-bit integers!");
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
@@ -3992,7 +3981,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
- auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
+ auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
auto AS = LI->getPointerAddressSpace();
auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
@@ -4025,7 +4014,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Setup the next partition.
PartOffset = Offsets.Splits[Idx];
++Idx;
- PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset;
+ PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
}
// Now that we have the split loads, do the slow walk over all uses of the
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index f9650efc051f..008ddfc72740 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -16,16 +16,13 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
@@ -76,7 +73,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopRerollLegacyPassPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnrollAndJamPass(Registry);
- initializeLoopUnswitchPass(Registry);
initializeWarnMissedTransformationsLegacyPass(Registry);
initializeLoopVersioningLICMLegacyPassPass(Registry);
initializeLoopIdiomRecognizeLegacyPassPass(Registry);
@@ -104,6 +100,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
+ initializeTLSVariableHoistLegacyPassPass(Registry);
initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
@@ -214,10 +211,6 @@ void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnrollAndJamPass());
}
-void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopUnswitchPass());
-}
-
void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLowerAtomicPass());
}
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index 29cea42e4a00..e2976ace3a4a 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -1,5 +1,5 @@
//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
-// instrinsics
+// intrinsics
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -24,11 +24,9 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -36,7 +34,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <algorithm>
#include <cassert>
using namespace llvm;
@@ -876,7 +873,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
- DTU.hasValue() ? DTU.getPointer() : nullptr);
+ DTU ? DTU.getPointer() : nullptr);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 3606c8a4b073..08f4b2173da2 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -39,8 +39,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
@@ -52,7 +50,7 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
-static cl::opt<bool> ScalarizeVariableInsertExtract(
+static cl::opt<bool> ClScalarizeVariableInsertExtract(
"scalarize-variable-insert-extract", cl::init(true), cl::Hidden,
cl::desc("Allow the scalarizer pass to scalarize "
"insertelement/extractelement with variable index"));
@@ -60,9 +58,9 @@ static cl::opt<bool> ScalarizeVariableInsertExtract(
// This is disabled by default because having separate loads and stores
// makes it more likely that the -combiner-alias-analysis limits will be
// reached.
-static cl::opt<bool>
- ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
- cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+static cl::opt<bool> ClScalarizeLoadStore(
+ "scalarize-load-store", cl::init(false), cl::Hidden,
+ cl::desc("Allow the scalarizer pass to scalarize loads and store"));
namespace {
@@ -96,7 +94,7 @@ public:
// Scatter V into Size components. If new instructions are needed,
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
- Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy,
ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
@@ -109,8 +107,8 @@ private:
BasicBlock *BB;
BasicBlock::iterator BBI;
Value *V;
+ Type *PtrElemTy;
ValueVector *CachePtr;
- PointerType *PtrTy;
ValueVector Tmp;
unsigned Size;
};
@@ -188,10 +186,23 @@ struct VectorLayout {
uint64_t ElemSize = 0;
};
+template <typename T>
+T getWithDefaultOverride(const cl::opt<T> &ClOption,
+ const llvm::Optional<T> &DefaultOverride) {
+ return ClOption.getNumOccurrences() ? ClOption
+ : DefaultOverride.value_or(ClOption);
+}
+
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT)
- : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT) {
+ ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT,
+ ScalarizerPassOptions Options)
+ : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT),
+ ScalarizeVariableInsertExtract(
+ getWithDefaultOverride(ClScalarizeVariableInsertExtract,
+ Options.ScalarizeVariableInsertExtract)),
+ ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
+ Options.ScalarizeLoadStore)) {
}
bool visit(Function &F);
@@ -216,8 +227,9 @@ public:
bool visitCallInst(CallInst &ICI);
private:
- Scatterer scatter(Instruction *Point, Value *V);
+ Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr);
void gather(Instruction *Op, const ValueVector &CV);
+ void replaceUses(Instruction *Op, Value *CV);
bool canTransferMetadata(unsigned Kind);
void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
Optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
@@ -231,12 +243,16 @@ private:
ScatterMap Scattered;
GatherList Gathered;
+ bool Scalarized;
SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
unsigned ParallelLoopAccessMDKind;
DominatorTree *DT;
+
+ const bool ScalarizeVariableInsertExtract;
+ const bool ScalarizeLoadStore;
};
class ScalarizerLegacyPass : public FunctionPass {
@@ -265,12 +281,14 @@ INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- ValueVector *cachePtr)
- : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+ Type *PtrElemTy, ValueVector *cachePtr)
+ : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) {
Type *Ty = V->getType();
- PtrTy = dyn_cast<PointerType>(Ty);
- if (PtrTy)
- Ty = PtrTy->getPointerElementType();
+ if (Ty->isPointerTy()) {
+ assert(cast<PointerType>(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) &&
+ "Pointer element type mismatch");
+ Ty = PtrElemTy;
+ }
Size = cast<FixedVectorType>(Ty)->getNumElements();
if (!CachePtr)
Tmp.resize(Size, nullptr);
@@ -287,15 +305,15 @@ Value *Scatterer::operator[](unsigned I) {
if (CV[I])
return CV[I];
IRBuilder<> Builder(BB, BBI);
- if (PtrTy) {
- Type *ElTy =
- cast<VectorType>(PtrTy->getPointerElementType())->getElementType();
+ if (PtrElemTy) {
+ Type *VectorElemTy = cast<VectorType>(PtrElemTy)->getElementType();
if (!CV[0]) {
- Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
+ Type *NewPtrTy = PointerType::get(
+ VectorElemTy, V->getType()->getPointerAddressSpace());
CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
}
if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
+ CV[I] = Builder.CreateConstGEP1_32(VectorElemTy, CV[0], I,
V->getName() + ".i" + Twine(I));
} else {
// Search through a chain of InsertElementInsts looking for element I.
@@ -334,7 +352,7 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions());
return Impl.visit(F);
}
@@ -345,6 +363,8 @@ FunctionPass *llvm::createScalarizerPass() {
bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());
+ Scalarized = false;
+
// To ensure we replace gathered components correctly we need to do an ordered
// traversal of the basic blocks in the function.
ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
@@ -362,13 +382,14 @@ bool ScalarizerVisitor::visit(Function &F) {
// Return a scattered form of V that can be accessed by Point. V must be a
// vector or a pointer to a vector.
-Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
+Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
+ Type *PtrElemTy) {
if (Argument *VArg = dyn_cast<Argument>(V)) {
// Put the scattered form of arguments in the entry block,
// so that it can be used everywhere.
Function *F = VArg->getParent();
BasicBlock *BB = &F->getEntryBlock();
- return Scatterer(BB, BB->begin(), V, &Scattered[V]);
+ return Scatterer(BB, BB->begin(), V, PtrElemTy, &Scattered[V]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
// When scalarizing PHI nodes we might try to examine/rewrite InsertElement
@@ -379,17 +400,17 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
// need to analyse them further.
if (!DT->isReachableFromEntry(VOp->getParent()))
return Scatterer(Point->getParent(), Point->getIterator(),
- UndefValue::get(V->getType()));
+ PoisonValue::get(V->getType()), PtrElemTy);
// Put the scattered form of an instruction directly after the
// instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
return Scatterer(
BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
- &Scattered[V]);
+ PtrElemTy, &Scattered[V]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point->getIterator(), V);
+ return Scatterer(Point->getParent(), Point->getIterator(), V, PtrElemTy);
}
// Replace Op with the gathered form of the components in CV. Defer the
@@ -419,6 +440,15 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
Gathered.push_back(GatherList::value_type(Op, &SV));
}
+// Replace Op with CV and collect Op has a potentially dead instruction.
+void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
+ if (CV != Op) {
+ Op->replaceAllUsesWith(CV);
+ PotentiallyDeadInstrs.emplace_back(Op);
+ Scalarized = true;
+ }
+}
+
// Return true if it is safe to transfer the given metadata tag from
// vector to scalar instructions.
bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
@@ -558,9 +588,11 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
if (OpI->getType()->isVectorTy()) {
Scattered[I] = scatter(&CI, OpI);
assert(Scattered[I].size() == NumElems && "mismatched call operands");
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
+ Tys.push_back(OpI->getType()->getScalarType());
} else {
ScalarOperands[I] = OpI;
- if (hasVectorInstrinsicOverloadedScalarOpd(ID, I))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
Tys.push_back(OpI->getType());
}
}
@@ -576,7 +608,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
ScalarCallOps.clear();
for (unsigned J = 0; J != NumArgs; ++J) {
- if (hasVectorInstrinsicScalarOpd(ID, J))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
ScalarCallOps.push_back(ScalarOperands[J]);
else
ScalarCallOps.push_back(Scattered[J][Elem]);
@@ -809,7 +841,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
Value *Res = Op0[CI->getValue().getZExtValue()];
- gather(&EEI, {Res});
+ replaceUses(&EEI, Res);
return true;
}
@@ -825,7 +857,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
EEI.getName() + ".upto" + Twine(I));
}
- gather(&EEI, {Res});
+ replaceUses(&EEI, Res);
return true;
}
@@ -891,7 +923,7 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&LI);
- Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
+ Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType());
ValueVector Res;
Res.resize(NumElems);
@@ -917,7 +949,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&SI);
- Scatterer VPtr = scatter(&SI, SI.getPointerOperand());
+ Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType());
Scatterer VVal = scatter(&SI, FullValue);
ValueVector Stores;
@@ -940,7 +972,7 @@ bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
bool ScalarizerVisitor::finish() {
// The presence of data in Gathered or Scattered indicates changes
// made to the Function.
- if (Gathered.empty() && Scattered.empty())
+ if (Gathered.empty() && Scattered.empty() && !Scalarized)
return false;
for (const auto &GMI : Gathered) {
Instruction *Op = GMI.first;
@@ -971,6 +1003,7 @@ bool ScalarizerVisitor::finish() {
}
Gathered.clear();
Scattered.clear();
+ Scalarized = false;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
@@ -982,7 +1015,7 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options);
bool Changed = Impl.visit(F);
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index d23925042b0a..7da5a78772ad 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -189,7 +189,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index a27da047bfd3..0535608244cc 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -28,6 +27,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -49,7 +49,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -81,7 +83,6 @@ static cl::opt<bool> EnableNonTrivialUnswitch(
static cl::opt<int>
UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
- cl::ZeroOrMore,
cl::desc("The cost threshold for unswitching a loop."));
static cl::opt<bool> EnableUnswitchCostMultiplier(
@@ -110,17 +111,27 @@ static cl::opt<unsigned>
"partial unswitching analysis"),
cl::init(100), cl::Hidden);
static cl::opt<bool> FreezeLoopUnswitchCond(
- "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
+ "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
cl::desc("If enabled, the freeze instruction will be added to condition "
"of loop unswitch to prevent miscompilation."));
+// Helper to skip (select x, true, false), which matches both a logical AND and
+// OR and can confuse code that tries to determine if \p Cond is either a
+// logical AND or OR but not both.
+static Value *skipTrivialSelect(Value *Cond) {
+ Value *CondNext;
+ while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
+ Cond = CondNext;
+ return Cond;
+}
+
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
///
/// This essentially walks from a root recursively through loop variant operands
-/// which have the exact same opcode and finds all inputs which are loop
-/// invariant. For some operations these can be re-associated and unswitched out
-/// of the loop entirely.
+/// which have perform the same logical operation (AND or OR) and finds all
+/// inputs which are loop invariant. For some operations these can be
+/// re-associated and unswitched out of the loop entirely.
static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
LoopInfo &LI) {
@@ -150,7 +161,7 @@ collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
}
// If not an instruction with the same opcode, nothing we can do.
- Instruction *OpI = dyn_cast<Instruction>(OpV);
+ Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
(IsRootOr && match(OpI, m_LogicalOr())))) {
@@ -202,13 +213,19 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
/// branch on a single value.
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
- BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
+ BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
+ Instruction *I, AssumptionCache *AC, DominatorTree &DT) {
IRBuilder<> IRB(&BB);
- Value *Cond = Direction ? IRB.CreateOr(Invariants) :
- IRB.CreateAnd(Invariants);
- if (InsertFreeze)
- Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
+ SmallVector<Value *> FrozenInvariants;
+ for (Value *Inv : Invariants) {
+ if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
+ Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
+ FrozenInvariants.push_back(Inv);
+ }
+
+ Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
+ : IRB.CreateAnd(FrozenInvariants);
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -442,11 +459,12 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// some input conditions to the branch.
bool FullUnswitch = false;
- if (L.isLoopInvariant(BI.getCondition())) {
- Invariants.push_back(BI.getCondition());
+ Value *Cond = skipTrivialSelect(BI.getCondition());
+ if (L.isLoopInvariant(Cond)) {
+ Invariants.push_back(Cond);
FullUnswitch = true;
} else {
- if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
+ if (auto *CondInst = dyn_cast<Instruction>(Cond))
Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
if (Invariants.empty()) {
LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
@@ -480,8 +498,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// is a graph of `or` operations, or the exit block is along the false edge
// and the condition is a graph of `and` operations.
if (!FullUnswitch) {
- if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
- : !match(BI.getCondition(), m_LogicalAnd())) {
+ if (ExitDirection ? !match(Cond, m_LogicalOr())
+ : !match(Cond, m_LogicalAnd())) {
LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n");
return false;
@@ -546,6 +564,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// its successors.
OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
BI);
+ BI.setCondition(Cond);
if (MSSAU) {
// Temporarily clone the terminator, to make MSSA update cheaper by
// separating "insert edge" updates from "remove edge" ones.
@@ -561,15 +580,16 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// Only unswitching a subset of inputs to the condition, so we will need to
// build a new branch that merges the invariant inputs.
if (ExitDirection)
- assert(match(BI.getCondition(), m_LogicalOr()) &&
+ assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&
"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!");
else
- assert(match(BI.getCondition(), m_LogicalAnd()) &&
+ assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&
"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!");
- buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
- *UnswitchedBB, *NewPH, false);
+ buildPartialUnswitchConditionalBranch(
+ *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
+ FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
}
// Update the dominator tree with the added edge.
@@ -1019,7 +1039,8 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
// Don't bother trying to unswitch past an unconditional branch or a branch
// with a constant value. These should be removed by simplifycfg prior to
// running this pass.
- if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ if (!BI->isConditional() ||
+ isa<Constant>(skipTrivialSelect(BI->getCondition())))
return Changed;
// Found a trivial condition candidate: non-foldable conditional branch. If
@@ -1663,7 +1684,7 @@ deleteDeadBlocksFromLoop(Loop &L,
// uses in other blocks.
for (auto &I : *BB)
if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.replaceAllUsesWith(PoisonValue::get(I.getType()));
BB->dropAllReferences();
}
@@ -2042,12 +2063,13 @@ static void unswitchNontrivialInvariants(
"Can only unswitch switches and conditional branch!");
bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
bool FullUnswitch =
- SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
+ SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
+ !PartiallyInvariant);
if (FullUnswitch)
assert(Invariants.size() == 1 &&
"Cannot have other invariants with full unswitching!");
else
- assert(isa<Instruction>(BI->getCondition()) &&
+ assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
"Partial unswitching requires an instruction as the condition!");
if (MSSAU && VerifyMemorySSA)
@@ -2062,14 +2084,14 @@ static void unswitchNontrivialInvariants(
bool Direction = true;
int ClonedSucc = 0;
if (!FullUnswitch) {
- Value *Cond = BI->getCondition();
+ Value *Cond = skipTrivialSelect(BI->getCondition());
(void)Cond;
assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||
PartiallyInvariant) &&
"Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.");
- if (!match(BI->getCondition(), m_LogicalOr())) {
- if (match(BI->getCondition(), m_LogicalAnd()) ||
+ if (!match(Cond, m_LogicalOr())) {
+ if (match(Cond, m_LogicalAnd()) ||
(PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
Direction = false;
ClonedSucc = 1;
@@ -2209,11 +2231,12 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ Value *Cond = skipTrivialSelect(BI->getCondition());
if (InsertFreeze) {
- auto Cond = BI->getCondition();
if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
- BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
+ Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI);
}
+ BI->setCondition(Cond);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2311,9 +2334,11 @@ static void unswitchNontrivialInvariants(
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
- else
- buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
- *ClonedPH, *LoopPH, InsertFreeze);
+ else {
+ buildPartialUnswitchConditionalBranch(
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
+ FreezeLoopUnswitchCond, BI, &AC, DT);
+ }
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
if (MSSAU) {
@@ -2745,22 +2770,16 @@ static bool unswitchBestCondition(
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;
- // If BI's condition is 'select _, true, false', simplify it to confuse
- // matchers
- Value *Cond = BI->getCondition(), *CondNext;
- while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
- Cond = CondNext;
- BI->setCondition(Cond);
-
+ Value *Cond = skipTrivialSelect(BI->getCondition());
if (isa<Constant>(Cond))
continue;
- if (L.isLoopInvariant(BI->getCondition())) {
- UnswitchCandidates.push_back({BI, {BI->getCondition()}});
+ if (L.isLoopInvariant(Cond)) {
+ UnswitchCandidates.push_back({BI, {Cond}});
continue;
}
- Instruction &CondI = *cast<Instruction>(BI->getCondition());
+ Instruction &CondI = *cast<Instruction>(Cond);
if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
TinyPtrVector<Value *> Invariants =
collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
@@ -2785,8 +2804,7 @@ static bool unswitchBestCondition(
PartialIVInfo = *Info;
PartialIVCondBranch = L.getHeader()->getTerminator();
TinyPtrVector<Value *> ValsToDuplicate;
- for (auto *Inst : Info->InstToDuplicate)
- ValsToDuplicate.push_back(Inst);
+ llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
UnswitchCandidates.push_back(
{L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
}
@@ -2902,10 +2920,11 @@ static bool unswitchBestCondition(
// its cost.
if (!FullUnswitch) {
auto &BI = cast<BranchInst>(TI);
- if (match(BI.getCondition(), m_LogicalAnd())) {
+ Value *Cond = skipTrivialSelect(BI.getCondition());
+ if (match(Cond, m_LogicalAnd())) {
if (SuccBB == BI.getSuccessor(1))
continue;
- } else if (match(BI.getCondition(), m_LogicalOr())) {
+ } else if (match(Cond, m_LogicalOr())) {
if (SuccBB == BI.getSuccessor(0))
continue;
} else if ((PartialIVInfo.KnownValue->isOneValue() &&
@@ -2947,8 +2966,9 @@ static bool unswitchBestCondition(
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
InstructionCost CandidateCost = ComputeUnswitchedCost(
- TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
- Invariants[0] == BI->getCondition()));
+ TI, /*FullUnswitch*/ !BI ||
+ (Invariants.size() == 1 &&
+ Invariants[0] == skipTrivialSelect(BI->getCondition())));
// Calculate cost multiplier which is a tool to limit potentially
// exponential behavior of loop-unswitch.
if (EnableUnswitchCostMultiplier) {
@@ -3131,8 +3151,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- UnswitchCB, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
DestroyLoopCB))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ee17da1875e5..fb2d812a186d 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -31,19 +31,16 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <utility>
@@ -59,6 +56,11 @@ static cl::opt<bool> UserKeepLoops(
"keep-loops", cl::Hidden, cl::init(true),
cl::desc("Preserve canonical loop structure (default = true)"));
+static cl::opt<bool> UserSwitchRangeToICmp(
+ "switch-range-to-icmp", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Convert switches into an integer range comparison (default = false)"));
+
static cl::opt<bool> UserSwitchToLookup(
"switch-to-lookup", cl::Hidden, cl::init(false),
cl::desc("Convert switches to lookup tables (default = false)"));
@@ -311,6 +313,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.BonusInstThreshold = UserBonusInstThreshold;
if (UserForwardSwitchCond.getNumOccurrences())
Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+ if (UserSwitchRangeToICmp.getNumOccurrences())
+ Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp;
if (UserSwitchToLookup.getNumOccurrences())
Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
if (UserKeepLoops.getNumOccurrences())
@@ -337,6 +341,8 @@ void SimplifyCFGPass::printPipeline(
OS << "<";
OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+ OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-")
+ << "switch-range-to-icmp;";
OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp
index 8600aacdb056..e8fde53005f0 100644
--- a/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -15,12 +15,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -48,7 +43,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
}
if (Inst->isTerminator() || isa<PHINode>(Inst) || Inst->isEHPad() ||
- Inst->mayThrow())
+ Inst->mayThrow() || !Inst->willReturn())
return false;
if (auto *Call = dyn_cast<CallBase>(Inst)) {
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 06169a7834f6..9ac4608134c2 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -63,10 +63,10 @@
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -275,7 +275,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
});
}
- // Usially debug label instrinsic corresponds to label in LLVM IR. In these
+ // Usially debug label intrinsic corresponds to label in LLVM IR. In these
// cases we should not move it here.
// TODO: Possible special processing needed to detect it is related to a
// hoisted instruction.
@@ -301,7 +301,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
return false; // too much to hoist
} else {
- // Debug info instrinsics should not be counted for threshold.
+ // Debug info intrinsics should not be counted for threshold.
if (!isa<DbgInfoIntrinsic>(I))
NotHoistedInstCount++;
if (NotHoistedInstCount > SpecExecMaxNotHoisted)
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index b47378808216..70df0cec0dca 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -68,7 +68,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -683,24 +682,16 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
- if (InBounds)
- Reduced =
- Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
- else
- Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
+ Reduced =
+ Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
} else {
// C = gep Basis, Bump
// Canonicalize bump to pointer size.
Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
- if (InBounds)
- Reduced = Builder.CreateInBoundsGEP(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
- Basis.Ins, Bump);
- else
- Reduced = Builder.CreateGEP(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
- Basis.Ins, Bump);
+ Reduced = Builder.CreateGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+ Basis.Ins, Bump, "", InBounds);
}
break;
}
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index b3a445368537..f6525ad7de9b 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -18,10 +18,8 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
-#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -33,7 +31,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
@@ -41,7 +38,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -72,6 +68,11 @@ static cl::opt<bool>
cl::desc("Allow relaxed uniform region checks"),
cl::init(true));
+static cl::opt<unsigned>
+ ReorderNodeSize("structurizecfg-node-reorder-size",
+ cl::desc("Limit region size for reordering nodes"),
+ cl::init(100), cl::Hidden);
+
// Definition of the complex types used in this pass.
using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -266,6 +267,8 @@ class StructurizeCFG {
void orderNodes();
+ void reorderNodes();
+
void analyzeLoops(RegionNode *N);
Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
@@ -424,6 +427,57 @@ void StructurizeCFG::orderNodes() {
}
}
+/// Change the node ordering to decrease the range of live values, especially
+/// the values that capture the control flow path for branches. We do this
+/// by moving blocks with a single predecessor and successor to appear after
+/// predecessor. The motivation is to move some loop exit blocks into a loop.
+/// In cases where a loop has a large number of exit blocks, this reduces the
+/// amount of values needed across the loop boundary.
+void StructurizeCFG::reorderNodes() {
+ SmallVector<RegionNode *, 8> NewOrder;
+ DenseMap<BasicBlock *, unsigned> MoveTo;
+ BitVector Moved(Order.size());
+
+ // The benefits of reordering nodes occurs for large regions.
+ if (Order.size() <= ReorderNodeSize)
+ return;
+
+ // The algorithm works with two passes over Order. The first pass identifies
+ // the blocks to move and the position to move them to. The second pass
+ // creates the new order based upon this information. We move blocks with
+ // a single predecessor and successor. If there are multiple candidates then
+ // maintain the original order.
+ BBSet Seen;
+ for (int I = Order.size() - 1; I >= 0; --I) {
+ auto *BB = Order[I]->getEntry();
+ Seen.insert(BB);
+ auto *Pred = BB->getSinglePredecessor();
+ auto *Succ = BB->getSingleSuccessor();
+ // Consider only those basic blocks that have a predecessor in Order and a
+ // successor that exits the region. The region may contain subregions that
+ // have been structurized and are not included in Order.
+ if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() &&
+ !MoveTo.count(Pred)) {
+ MoveTo[Pred] = I;
+ Moved.set(I);
+ }
+ }
+
+ // If no blocks have been moved then the original order is good.
+ if (!Moved.count())
+ return;
+
+ for (size_t I = 0, E = Order.size(); I < E; ++I) {
+ auto *BB = Order[I]->getEntry();
+ if (MoveTo.count(BB))
+ NewOrder.push_back(Order[MoveTo[BB]]);
+ if (!Moved[I])
+ NewOrder.push_back(Order[I]);
+ }
+
+ Order.assign(NewOrder);
+}
+
/// Determine the end of the loops
void StructurizeCFG::analyzeLoops(RegionNode *N) {
if (N->isSubRegion()) {
@@ -685,7 +739,7 @@ void StructurizeCFG::simplifyAffectedPhis() {
Q.DT = DT;
for (WeakVH VH : AffectedPhis) {
if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
- if (auto NewValue = SimplifyInstruction(Phi, Q)) {
+ if (auto NewValue = simplifyInstruction(Phi, Q)) {
Phi->replaceAllUsesWith(NewValue);
Phi->eraseFromParent();
Changed = true;
@@ -1085,12 +1139,13 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
ParentRegion = R;
orderNodes();
+ reorderNodes();
collectInfos();
createFlow();
insertConditions(false);
insertConditions(true);
- simplifyConditions();
setPhiValues();
+ simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
diff --git a/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
new file mode 100644
index 000000000000..16b3483f9687
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
@@ -0,0 +1,306 @@
+//===- TLSVariableHoist.cpp -------- Remove Redundant TLS Loads ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies/eliminate Redundant TLS Loads if related option is set.
+// The example: Please refer to the comment at the head of TLSVariableHoist.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/TLSVariableHoist.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+using namespace tlshoist;
+
+#define DEBUG_TYPE "tlshoist"
+
+static cl::opt<bool> TLSLoadHoist(
+ "tls-load-hoist", cl::init(false), cl::Hidden,
+ cl::desc("hoist the TLS loads in PIC model to eliminate redundant "
+ "TLS address calculation."));
+
+namespace {
+
+/// The TLS Variable hoist pass.
+class TLSVariableHoistLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ TLSVariableHoistLegacyPass() : FunctionPass(ID) {
+ initializeTLSVariableHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &Fn) override;
+
+ StringRef getPassName() const override { return "TLS Variable Hoist"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+
+private:
+ TLSVariableHoistPass Impl;
+};
+
+} // end anonymous namespace
+
+char TLSVariableHoistLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(TLSVariableHoistLegacyPass, "tlshoist",
+ "TLS Variable Hoist", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(TLSVariableHoistLegacyPass, "tlshoist",
+ "TLS Variable Hoist", false, false)
+
+FunctionPass *llvm::createTLSVariableHoistPass() {
+ return new TLSVariableHoistLegacyPass();
+}
+
+/// Perform the TLS Variable Hoist optimization for the given function.
+bool TLSVariableHoistLegacyPass::runOnFunction(Function &Fn) {
+ if (skipFunction(Fn))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** Begin TLS Variable Hoist **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
+
+ bool MadeChange =
+ Impl.runImpl(Fn, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo());
+
+ if (MadeChange) {
+ LLVM_DEBUG(dbgs() << "********** Function after TLS Variable Hoist: "
+ << Fn.getName() << '\n');
+ LLVM_DEBUG(dbgs() << Fn);
+ }
+ LLVM_DEBUG(dbgs() << "********** End TLS Variable Hoist **********\n");
+
+ return MadeChange;
+}
+
+void TLSVariableHoistPass::collectTLSCandidate(Instruction *Inst) {
+ // Skip all cast instructions. They are visited indirectly later on.
+ if (Inst->isCast())
+ return;
+
+ // Scan all operands.
+ for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
+ auto *GV = dyn_cast<GlobalVariable>(Inst->getOperand(Idx));
+ if (!GV || !GV->isThreadLocal())
+ continue;
+
+ // Add Candidate to TLSCandMap (GV --> Candidate).
+ TLSCandMap[GV].addUser(Inst, Idx);
+ }
+}
+
+void TLSVariableHoistPass::collectTLSCandidates(Function &Fn) {
+ // First, quickly check if there is TLS Variable.
+ Module *M = Fn.getParent();
+
+ bool HasTLS = llvm::any_of(
+ M->globals(), [](GlobalVariable &GV) { return GV.isThreadLocal(); });
+
+ // If non, directly return.
+ if (!HasTLS)
+ return;
+
+ TLSCandMap.clear();
+
+ // Then, collect TLS Variable info.
+ for (BasicBlock &BB : Fn) {
+ // Ignore unreachable basic blocks.
+ if (!DT->isReachableFromEntry(&BB))
+ continue;
+
+ for (Instruction &Inst : BB)
+ collectTLSCandidate(&Inst);
+ }
+}
+
+static bool oneUseOutsideLoop(tlshoist::TLSCandidate &Cand, LoopInfo *LI) {
+ if (Cand.Users.size() != 1)
+ return false;
+
+ BasicBlock *BB = Cand.Users[0].Inst->getParent();
+ if (LI->getLoopFor(BB))
+ return false;
+
+ return true;
+}
+
+Instruction *TLSVariableHoistPass::getNearestLoopDomInst(BasicBlock *BB,
+ Loop *L) {
+ assert(L && "Unexcepted Loop status!");
+
+ // Get the outermost loop.
+ while (Loop *Parent = L->getParentLoop())
+ L = Parent;
+
+ BasicBlock *PreHeader = L->getLoopPreheader();
+
+ // There is unique predecessor outside the loop.
+ if (PreHeader)
+ return PreHeader->getTerminator();
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Dom = Header;
+ for (BasicBlock *PredBB : predecessors(Header))
+ Dom = DT->findNearestCommonDominator(Dom, PredBB);
+
+ assert(Dom && "Not find dominator BB!");
+ Instruction *Term = Dom->getTerminator();
+
+ return Term;
+}
+
+Instruction *TLSVariableHoistPass::getDomInst(Instruction *I1,
+ Instruction *I2) {
+ if (!I1)
+ return I2;
+ if (DT->dominates(I1, I2))
+ return I1;
+ if (DT->dominates(I2, I1))
+ return I2;
+
+ // If there is no dominance relation, use common dominator.
+ BasicBlock *DomBB =
+ DT->findNearestCommonDominator(I1->getParent(), I2->getParent());
+
+ Instruction *Dom = DomBB->getTerminator();
+ assert(Dom && "Common dominator not found!");
+
+ return Dom;
+}
+
+BasicBlock::iterator TLSVariableHoistPass::findInsertPos(Function &Fn,
+ GlobalVariable *GV,
+ BasicBlock *&PosBB) {
+ tlshoist::TLSCandidate &Cand = TLSCandMap[GV];
+
+ // We should hoist the TLS use out of loop, so choose its nearest instruction
+ // which dominate the loop and the outside loops (if exist).
+ Instruction *LastPos = nullptr;
+ for (auto &User : Cand.Users) {
+ BasicBlock *BB = User.Inst->getParent();
+ Instruction *Pos = User.Inst;
+ if (Loop *L = LI->getLoopFor(BB)) {
+ Pos = getNearestLoopDomInst(BB, L);
+ assert(Pos && "Not find insert position out of loop!");
+ }
+ Pos = getDomInst(LastPos, Pos);
+ LastPos = Pos;
+ }
+
+ assert(LastPos && "Unexpected insert position!");
+ BasicBlock *Parent = LastPos->getParent();
+ PosBB = Parent;
+ return LastPos->getIterator();
+}
+
+// Generate a bitcast (no type change) to replace the uses of TLS Candidate.
+Instruction *TLSVariableHoistPass::genBitCastInst(Function &Fn,
+ GlobalVariable *GV) {
+ BasicBlock *PosBB = &Fn.getEntryBlock();
+ BasicBlock::iterator Iter = findInsertPos(Fn, GV, PosBB);
+ Type *Ty = GV->getType();
+ auto *CastInst = new BitCastInst(GV, Ty, "tls_bitcast");
+ PosBB->getInstList().insert(Iter, CastInst);
+ return CastInst;
+}
+
+bool TLSVariableHoistPass::tryReplaceTLSCandidate(Function &Fn,
+ GlobalVariable *GV) {
+
+ tlshoist::TLSCandidate &Cand = TLSCandMap[GV];
+
+ // If only used 1 time and not in loops, we no need to replace it.
+ if (oneUseOutsideLoop(Cand, LI))
+ return false;
+
+ // Generate a bitcast (no type change)
+ auto *CastInst = genBitCastInst(Fn, GV);
+
+ // to replace the uses of TLS Candidate
+ for (auto &User : Cand.Users)
+ User.Inst->setOperand(User.OpndIdx, CastInst);
+
+ return true;
+}
+
+bool TLSVariableHoistPass::tryReplaceTLSCandidates(Function &Fn) {
+ if (TLSCandMap.empty())
+ return false;
+
+ bool Replaced = false;
+ for (auto &GV2Cand : TLSCandMap) {
+ GlobalVariable *GV = GV2Cand.first;
+ Replaced |= tryReplaceTLSCandidate(Fn, GV);
+ }
+
+ return Replaced;
+}
+
+/// Optimize expensive TLS variables in the given function.
+bool TLSVariableHoistPass::runImpl(Function &Fn, DominatorTree &DT,
+ LoopInfo &LI) {
+ if (Fn.hasOptNone())
+ return false;
+
+ if (!TLSLoadHoist && !Fn.getAttributes().hasFnAttr("tls-load-hoist"))
+ return false;
+
+ this->LI = &LI;
+ this->DT = &DT;
+ assert(this->LI && this->DT && "Unexcepted requirement!");
+
+ // Collect all TLS variable candidates.
+ collectTLSCandidates(Fn);
+
+ bool MadeChange = tryReplaceTLSCandidates(Fn);
+
+ return MadeChange;
+}
+
+PreservedAnalyses TLSVariableHoistPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (!runImpl(F, DT, LI))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 3bcf92e28a21..27c04177e894 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -53,11 +53,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -76,14 +73,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "tailcallelim"
@@ -248,10 +243,10 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
isa<PseudoProbeInst>(&I))
continue;
- // Special-case operand bundle "clang.arc.attachedcall".
+ // Special-case operand bundles "clang.arc.attachedcall" and "ptrauth".
bool IsNoTail =
CI->isNoTailCall() || CI->hasOperandBundlesOtherThan(
- LLVMContext::OB_clang_arc_attachedcall);
+ {LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_ptrauth});
if (!IsNoTail && CI->doesNotAccessMemory()) {
// A call to a readnone function whose arguments are all things computed
@@ -531,7 +526,7 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
}
// If the function doen't return void, create the RetPN and RetKnownPN PHI
- // nodes to track our return value. We initialize RetPN with undef and
+ // nodes to track our return value. We initialize RetPN with poison and
// RetKnownPN with false since we can't know our return value at function
// entry.
Type *RetType = F.getReturnType();
@@ -540,7 +535,7 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
RetPN = PHINode::Create(RetType, 2, "ret.tr", InsertPos);
RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr", InsertPos);
- RetPN->addIncoming(UndefValue::get(RetType), NewEntry);
+ RetPN->addIncoming(PoisonValue::get(RetType), NewEntry);
RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry);
}
@@ -734,7 +729,7 @@ void TailRecursionEliminator::cleanupAndFinalize() {
// call.
for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
+ if (Value *PNV = simplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 80a7d3a43ad6..8367e61c1a47 100644
--- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -61,7 +61,7 @@ static void warnAboutLeftoverTransformations(Loop *L,
<< "loop not vectorized: the optimizer was unable to perform the "
"requested transformation; the transformation might be disabled "
"or specified as part of an unsupported transformation ordering");
- else if (InterleaveCount.getValueOr(0) != 1)
+ else if (InterleaveCount.value_or(0) != 1)
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedInterleaving",