summaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp9
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp11
-rw-r--r--lib/Analysis/CallGraph.cpp34
-rw-r--r--lib/Analysis/ConstantFolding.cpp80
-rw-r--r--lib/Analysis/DemandedBits.cpp10
-rw-r--r--lib/Analysis/InlineCost.cpp2
-rw-r--r--lib/Analysis/InstructionSimplify.cpp167
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp3
-rw-r--r--lib/Analysis/OptimizationDiagnosticInfo.cpp2
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp13
-rw-r--r--lib/Analysis/ScalarEvolution.cpp159
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp112
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp13
-rw-r--r--lib/Analysis/ValueTracking.cpp276
-rw-r--r--lib/Analysis/VectorUtils.cpp1
15 files changed, 569 insertions, 323 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 537823020301..a33c01a0e461 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -17,13 +17,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -36,6 +36,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include <algorithm>
#define DEBUG_TYPE "basicaa"
@@ -1283,9 +1284,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// give up if we can't determine conditions that hold for every cycle:
const Value *V = DecompGEP1.VarIndices[i].V;
- bool SignKnownZero, SignKnownOne;
- ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
- 0, &AC, nullptr, DT);
+ KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT);
+ bool SignKnownZero = Known.isNonNegative();
+ bool SignKnownOne = Known.isNegative();
// Zero-extension widens the variable, and so forces the sign
// bit to zero.
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 0dc4475ca0e2..db87b17c1567 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -301,6 +301,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
WeightSum += Weights[i];
}
}
+ assert(WeightSum <= UINT32_MAX &&
+ "Expected weights to scale down to 32 bits");
if (WeightSum == 0 || ReachableIdxs.size() == 0) {
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
@@ -328,21 +330,14 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
// the difference between reachable blocks.
if (ToDistribute > BranchProbability::getZero()) {
BranchProbability PerEdge = ToDistribute / ReachableIdxs.size();
- for (auto i : ReachableIdxs) {
+ for (auto i : ReachableIdxs)
BP[i] += PerEdge;
- ToDistribute -= PerEdge;
- }
- // Tail goes to the first reachable edge.
- BP[ReachableIdxs[0]] += ToDistribute;
}
}
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
setEdgeProbability(BB, i, BP[i]);
- assert(WeightSum <= UINT32_MAX &&
- "Expected weights to scale down to 32 bits");
-
return true;
}
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 6942176ae6ae..ff5242f69a1b 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -21,23 +21,18 @@ using namespace llvm;
//
CallGraph::CallGraph(Module &M)
- : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
+ : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)),
CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
// Add every function to the call graph.
for (Function &F : M)
addToCallGraph(&F);
-
- // If we didn't find a main function, use the external call graph node
- if (!Root)
- Root = ExternalCallingNode;
}
CallGraph::CallGraph(CallGraph &&Arg)
- : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root),
+ : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)),
ExternalCallingNode(Arg.ExternalCallingNode),
CallsExternalNode(std::move(Arg.CallsExternalNode)) {
Arg.FunctionMap.clear();
- Arg.Root = nullptr;
Arg.ExternalCallingNode = nullptr;
}
@@ -57,21 +52,9 @@ CallGraph::~CallGraph() {
void CallGraph::addToCallGraph(Function *F) {
CallGraphNode *Node = getOrInsertFunction(F);
- // If this function has external linkage, anything could call it.
- if (!F->hasLocalLinkage()) {
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // Found the entry point?
- if (F->getName() == "main") {
- if (Root) // Found multiple external mains? Don't pick one.
- Root = ExternalCallingNode;
- else
- Root = Node; // Found a main, keep track of it!
- }
- }
-
- // If this function has its address taken, anything could call it.
- if (F->hasAddressTaken())
+ // If this function has external linkage or has its address taken, anything
+ // could call it.
+ if (!F->hasLocalLinkage() || F->hasAddressTaken())
ExternalCallingNode->addCalledFunction(CallSite(), Node);
// If this function is not defined in this translation unit, it could call
@@ -96,13 +79,6 @@ void CallGraph::addToCallGraph(Function *F) {
}
void CallGraph::print(raw_ostream &OS) const {
- OS << "CallGraph Root is: ";
- if (Function *F = Root->getFunction())
- OS << F->getName() << "\n";
- else {
- OS << "<<null function: 0x" << Root << ">>\n";
- }
-
// Print in a deterministic order by sorting CallGraphNodes by name. We do
// this here to avoid slowing down the non-printing fast path.
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 130e917e49d7..0ca712bbfe70 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1438,6 +1438,36 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
case 't':
return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
+ case '_':
+
+ // Check for various function names that get used for the math functions
+ // when the header files are preprocessed with the macro
+ // __FINITE_MATH_ONLY__ enabled.
+ // The '12' here is the length of the shortest name that can match.
+ // We need to check the size before looking at Name[1] and Name[2]
+ // so we may as well check a limit that will eliminate mismatches.
+ if (Name.size() < 12 || Name[1] != '_')
+ return false;
+ switch (Name[2]) {
+ default:
+ return false;
+ case 'a':
+ return Name == "__acos_finite" || Name == "__acosf_finite" ||
+ Name == "__asin_finite" || Name == "__asinf_finite" ||
+ Name == "__atan2_finite" || Name == "__atan2f_finite";
+ case 'c':
+ return Name == "__cosh_finite" || Name == "__coshf_finite";
+ case 'e':
+ return Name == "__exp_finite" || Name == "__expf_finite" ||
+ Name == "__exp2_finite" || Name == "__exp2f_finite";
+ case 'l':
+ return Name == "__log_finite" || Name == "__logf_finite" ||
+ Name == "__log10_finite" || Name == "__log10f_finite";
+ case 'p':
+ return Name == "__pow_finite" || Name == "__powf_finite";
+ case 's':
+ return Name == "__sinh_finite" || Name == "__sinhf_finite";
+ }
}
}
@@ -1637,13 +1667,21 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (!TLI)
return nullptr;
- switch (Name[0]) {
+ char NameKeyChar = Name[0];
+ if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
+ NameKeyChar = Name[2];
+
+ switch (NameKeyChar) {
case 'a':
if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
- (Name == "acosf" && TLI->has(LibFunc_acosf)))
+ (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
+ (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
+ (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
return ConstantFoldFP(acos, V, Ty);
else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
- (Name == "asinf" && TLI->has(LibFunc_asinf)))
+ (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
+ (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
+ (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
return ConstantFoldFP(asin, V, Ty);
else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
(Name == "atanf" && TLI->has(LibFunc_atanf)))
@@ -1657,15 +1695,21 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
(Name == "cosf" && TLI->has(LibFunc_cosf)))
return ConstantFoldFP(cos, V, Ty);
else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
- (Name == "coshf" && TLI->has(LibFunc_coshf)))
+ (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
+ (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
+ (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
return ConstantFoldFP(cosh, V, Ty);
break;
case 'e':
if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
- (Name == "expf" && TLI->has(LibFunc_expf)))
+ (Name == "expf" && TLI->has(LibFunc_expf)) ||
+ (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
+ (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
return ConstantFoldFP(exp, V, Ty);
if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
- (Name == "exp2f" && TLI->has(LibFunc_exp2f)))
+ (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
+ (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
+ (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
@@ -1680,10 +1724,18 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
break;
case 'l':
if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
- (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)))
+ (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
+ (Name == "__log_finite" && V > 0 &&
+ TLI->has(LibFunc_log_finite)) ||
+ (Name == "__logf_finite" && V > 0 &&
+ TLI->has(LibFunc_logf_finite)))
return ConstantFoldFP(log, V, Ty);
else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
- (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)))
+ (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
+ (Name == "__log10_finite" && V > 0 &&
+ TLI->has(LibFunc_log10_finite)) ||
+ (Name == "__log10f_finite" && V > 0 &&
+ TLI->has(LibFunc_log10f_finite)))
return ConstantFoldFP(log10, V, Ty);
break;
case 'r':
@@ -1695,7 +1747,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
(Name == "sinf" && TLI->has(LibFunc_sinf)))
return ConstantFoldFP(sin, V, Ty);
else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
- (Name == "sinhf" && TLI->has(LibFunc_sinhf)))
+ (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
+ (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
+ (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
return ConstantFoldFP(sinh, V, Ty);
else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
(Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
@@ -1813,13 +1867,17 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (!TLI)
return nullptr;
if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
- (Name == "powf" && TLI->has(LibFunc_powf)))
+ (Name == "powf" && TLI->has(LibFunc_powf)) ||
+ (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
+ (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
(Name == "fmodf" && TLI->has(LibFunc_fmodf)))
return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
- (Name == "atan2f" && TLI->has(LibFunc_atan2f)))
+ (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
+ (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
+ (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 9f5dc5318239..8f808f3e7871 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -86,13 +86,11 @@ void DemandedBits::determineLiveOperandBits(
[&](unsigned BitWidth, const Value *V1, const Value *V2) {
const DataLayout &DL = I->getModule()->getDataLayout();
Known = KnownBits(BitWidth);
- computeKnownBits(const_cast<Value *>(V1), Known, DL, 0,
- &AC, UserI, &DT);
+ computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
if (V2) {
Known2 = KnownBits(BitWidth);
- computeKnownBits(const_cast<Value *>(V2), Known2, DL,
- 0, &AC, UserI, &DT);
+ computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT);
}
};
@@ -118,7 +116,7 @@ void DemandedBits::determineLiveOperandBits(
// known to be one.
ComputeKnownBits(BitWidth, I, nullptr);
AB = APInt::getHighBitsSet(BitWidth,
- std::min(BitWidth, Known.One.countLeadingZeros()+1));
+ std::min(BitWidth, Known.countMaxLeadingZeros()+1));
}
break;
case Intrinsic::cttz:
@@ -128,7 +126,7 @@ void DemandedBits::determineLiveOperandBits(
// known to be one.
ComputeKnownBits(BitWidth, I, nullptr);
AB = APInt::getLowBitsSet(BitWidth,
- std::min(BitWidth, Known.One.countTrailingZeros()+1));
+ std::min(BitWidth, Known.countMaxTrailingZeros()+1));
}
break;
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 100a591e452c..44c14cb17c22 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -63,7 +63,7 @@ static cl::opt<bool>
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
- "inlinecold-threshold", cl::Hidden, cl::init(225),
+ "inlinecold-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining functions with cold attribute"));
static cl::opt<int>
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 4a713f441ce8..5728887cc1e9 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1317,7 +1317,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// If all valid bits in the shift amount are known zero, the first operand is
// unchanged.
unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth);
- if (Known.Zero.countTrailingOnes() >= NumValidShiftBits)
+ if (Known.countMinTrailingZeros() >= NumValidShiftBits)
return Op0;
return nullptr;
@@ -1536,7 +1536,7 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1,
auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0);
auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1);
- // For and-of-comapares, check if the intersection is empty:
+ // For and-of-compares, check if the intersection is empty:
// (icmp X, C0) && (icmp X, C1) --> empty set --> false
if (IsAnd && Range0.intersectWith(Range1).isEmptySet())
return getFalse(Cmp0->getType());
@@ -1870,6 +1870,24 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
+ // (A & B) | (~A ^ B) -> (~A ^ B)
+ // (B & A) | (~A ^ B) -> (~A ^ B)
+ // (A & B) | (B ^ ~A) -> (B ^ ~A)
+ // (B & A) | (B ^ ~A) -> (B ^ ~A)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
+ match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
+ return Op1;
+
+ // (~A ^ B) | (A & B) -> (~A ^ B)
+ // (~A ^ B) | (B & A) -> (~A ^ B)
+ // (B ^ ~A) | (A & B) -> (B ^ ~A)
+ // (B ^ ~A) | (B & A) -> (B ^ ~A)
+ if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+ (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
+ match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
+ return Op0;
+
if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false))
return V;
@@ -2286,7 +2304,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
Type *ITy = GetCompareTy(LHS); // The return type.
- bool LHSKnownNonNegative, LHSKnownNegative;
switch (Pred) {
default:
llvm_unreachable("Unknown ICmp predicate!");
@@ -2304,39 +2321,41 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
- case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
+ case ICmpInst::ICMP_SLT: {
+ KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (LHSKnown.isNegative())
return getTrue(ITy);
- if (LHSKnownNonNegative)
+ if (LHSKnown.isNonNegative())
return getFalse(ITy);
break;
- case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
+ }
+ case ICmpInst::ICMP_SLE: {
+ KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (LHSKnown.isNegative())
return getTrue(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (LHSKnown.isNonNegative() &&
+ isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
- case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
+ }
+ case ICmpInst::ICMP_SGE: {
+ KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (LHSKnown.isNegative())
return getFalse(ITy);
- if (LHSKnownNonNegative)
+ if (LHSKnown.isNonNegative())
return getTrue(ITy);
break;
- case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
+ }
+ case ICmpInst::ICMP_SGT: {
+ KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (LHSKnown.isNegative())
return getFalse(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (LHSKnown.isNonNegative() &&
+ isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
}
+ }
return nullptr;
}
@@ -2535,6 +2554,9 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
+/// TODO: A large part of this logic is duplicated in InstCombine's
+/// foldICmpBinOp(). We should be able to share that and avoid the code
+/// duplication.
static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
@@ -2616,15 +2638,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return getTrue(ITy);
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
- bool RHSKnownNonNegative, RHSKnownNegative;
- bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, Q.DL, 0,
- Q.AC, Q.CxtI, Q.DT);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (RHSKnownNonNegative && YKnownNegative)
+ KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (RHSKnown.isNonNegative() && YKnown.isNegative())
return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
- if (RHSKnownNegative || YKnownNonNegative)
+ if (RHSKnown.isNegative() || YKnown.isNonNegative())
return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy);
}
}
@@ -2636,15 +2654,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return getFalse(ITy);
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) {
- bool LHSKnownNonNegative, LHSKnownNegative;
- bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0,
- Q.AC, Q.CxtI, Q.DT);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNonNegative && YKnownNegative)
+ KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (LHSKnown.isNonNegative() && YKnown.isNegative())
return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy);
- if (LHSKnownNegative || YKnownNonNegative)
+ if (LHSKnown.isNegative() || YKnown.isNonNegative())
return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy);
}
}
@@ -2691,28 +2705,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// icmp pred (urem X, Y), Y
if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
- bool KnownNonNegative, KnownNegative;
switch (Pred) {
default:
break;
case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (!KnownNonNegative)
+ case ICmpInst::ICMP_SGE: {
+ KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (!Known.isNonNegative())
break;
LLVM_FALLTHROUGH;
+ }
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (!KnownNonNegative)
+ case ICmpInst::ICMP_SLE: {
+ KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (!Known.isNonNegative())
break;
LLVM_FALLTHROUGH;
+ }
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
@@ -2722,28 +2735,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// icmp pred X, (urem Y, X)
if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
- bool KnownNonNegative, KnownNegative;
switch (Pred) {
default:
break;
case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (!KnownNonNegative)
+ case ICmpInst::ICMP_SGE: {
+ KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (!Known.isNonNegative())
break;
LLVM_FALLTHROUGH;
+ }
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (!KnownNonNegative)
+ case ICmpInst::ICMP_SLE: {
+ KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (!Known.isNonNegative())
break;
LLVM_FALLTHROUGH;
+ }
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
@@ -2815,10 +2827,19 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
break;
case Instruction::UDiv:
case Instruction::LShr:
- if (ICmpInst::isSigned(Pred))
+ if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact())
break;
- LLVM_FALLTHROUGH;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), Q, MaxRecurse - 1))
+ return V;
+ break;
case Instruction::SDiv:
+ if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact())
+ break;
+ if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ RBO->getOperand(0), Q, MaxRecurse - 1))
+ return V;
+ break;
case Instruction::AShr:
if (!LBO->isExact() || !RBO->isExact())
break;
@@ -4034,24 +4055,21 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
/// match a root vector source operand that contains that element in the same
/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
- Constant *Mask, Value *RootVec, int RootElt,
+ int MaskVal, Value *RootVec,
unsigned MaxRecurse) {
if (!MaxRecurse--)
return nullptr;
// Bail out if any mask value is undefined. That kind of shuffle may be
// simplified further based on demanded bits or other folds.
- int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
if (MaskVal == -1)
return nullptr;
// The mask value chooses which source operand we need to look at next.
- Value *SourceOp;
int InVecNumElts = Op0->getType()->getVectorNumElements();
- if (MaskVal < InVecNumElts) {
- RootElt = MaskVal;
- SourceOp = Op0;
- } else {
+ int RootElt = MaskVal;
+ Value *SourceOp = Op0;
+ if (MaskVal >= InVecNumElts) {
RootElt = MaskVal - InVecNumElts;
SourceOp = Op1;
}
@@ -4061,7 +4079,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
return foldIdentityShuffles(
DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
- SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
+ SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse);
}
// TODO: Look through bitcasts? What if the bitcast changes the vector element
@@ -4126,17 +4144,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
// second one.
if (Op0Const && !Op1Const) {
std::swap(Op0, Op1);
- for (int &Idx : Indices) {
- if (Idx == -1)
- continue;
- Idx = Idx < (int)InVecNumElts ? Idx + InVecNumElts : Idx - InVecNumElts;
- assert(Idx >= 0 && Idx < (int)InVecNumElts * 2 &&
- "shufflevector mask index out of range");
- }
- Mask = ConstantDataVector::get(
- Mask->getContext(),
- makeArrayRef(reinterpret_cast<uint32_t *>(Indices.data()),
- MaskNumElts));
+ ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
}
// A shuffle of a splat is always the splat itself. Legal if the shuffle's
@@ -4160,7 +4168,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
for (unsigned i = 0; i != MaskNumElts; ++i) {
// Note that recursion is limited for each vector element, so if any element
// exceeds the limit, this will fail to simplify.
- RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
+ RootVec =
+ foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse);
// We can't replace a widening/narrowing shuffle with one of its operands.
if (!RootVec || RootVec->getType() != RetTy)
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 99f900ae3932..26706f5509ba 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -232,7 +232,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
// We should have named any anonymous globals
assert(CalledFunction->hasName());
- auto ScaledCount = ProfileSummaryInfo::getProfileCount(&I, BFI);
+ auto ScaledCount = PSI->getProfileCount(&I, BFI);
auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
: CalleeInfo::HotnessType::Unknown;
@@ -330,6 +330,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
const Module &M,
std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
ProfileSummaryInfo *PSI) {
+ assert(PSI);
ModuleSummaryIndex Index;
// Identify the local values in the llvm.used and llvm.compiler.used sets,
diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp
index 73245981b022..e38e530c052d 100644
--- a/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -101,7 +101,7 @@ void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
// These are read-only for now.
DiagnosticLocation DL = OptDiag->getLocation();
StringRef FN =
- GlobalValue::getRealLinkageName(OptDiag->getFunction().getName());
+ GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
StringRef PassName(OptDiag->PassName);
io.mapRequired("Pass", PassName);
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 1a53a8ed4283..502f4205b689 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -75,11 +75,14 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
return None;
assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
"We can only get profile count for call/invoke instruction.");
- // Check if there is a profile metadata on the instruction. If it is present,
- // determine hotness solely based on that.
- uint64_t TotalCount;
- if (Inst->extractProfTotalWeight(TotalCount))
- return TotalCount;
+ if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) {
+ // In sample PGO mode, check if there is a profile metadata on the
+ // instruction. If it is present, determine hotness solely based on that,
+ // since the sampled entry count may not be accurate.
+ uint64_t TotalCount;
+ if (Inst->extractProfTotalWeight(TotalCount))
+ return TotalCount;
+ }
if (BFI)
return BFI->getBlockProfileCount(Inst->getParent());
return None;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 01dca0793145..800354d2f5b4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -584,7 +584,7 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
static int CompareSCEVComplexity(
SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
- unsigned Depth = 0) {
+ DominatorTree &DT, unsigned Depth = 0) {
// Fast-path: SCEVs are uniqued so we can do a quick equality check.
if (LHS == RHS)
return 0;
@@ -629,9 +629,16 @@ static int CompareSCEVComplexity(
const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
- // Compare addrec loop depths.
+ // If there is a dominance relationship between the loops, sort by the
+ // dominance. Otherwise, sort by depth. We require such order in getAddExpr.
const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
if (LLoop != RLoop) {
+ const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
+ assert(LHead != RHead && "Two loops share the same header?");
+ if (DT.dominates(LHead, RHead))
+ return 1;
+ else if (DT.dominates(RHead, LHead))
+ return -1;
unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
if (LDepth != RDepth)
return (int)LDepth - (int)RDepth;
@@ -645,7 +652,7 @@ static int CompareSCEVComplexity(
// Lexicographically compare.
for (unsigned i = 0; i != LNumOps; ++i) {
int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
- RA->getOperand(i), Depth + 1);
+ RA->getOperand(i), DT, Depth + 1);
if (X != 0)
return X;
}
@@ -669,7 +676,7 @@ static int CompareSCEVComplexity(
if (i >= RNumOps)
return 1;
int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
- RC->getOperand(i), Depth + 1);
+ RC->getOperand(i), DT, Depth + 1);
if (X != 0)
return X;
}
@@ -683,10 +690,10 @@ static int CompareSCEVComplexity(
// Lexicographically compare udiv expressions.
int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
- Depth + 1);
+ DT, Depth + 1);
if (X != 0)
return X;
- X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
+ X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT,
Depth + 1);
if (X == 0)
EqCacheSCEV.insert({LHS, RHS});
@@ -701,7 +708,7 @@ static int CompareSCEVComplexity(
// Compare cast expressions by operand.
int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
- RC->getOperand(), Depth + 1);
+ RC->getOperand(), DT, Depth + 1);
if (X == 0)
EqCacheSCEV.insert({LHS, RHS});
return X;
@@ -724,7 +731,7 @@ static int CompareSCEVComplexity(
/// land in memory.
///
static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
- LoopInfo *LI) {
+ LoopInfo *LI, DominatorTree &DT) {
if (Ops.size() < 2) return; // Noop
SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
@@ -732,15 +739,16 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
// This is the common case, which also happens to be trivially simple.
// Special case it.
const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
- if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
+ if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0)
std::swap(LHS, RHS);
return;
}
// Do the rough sort by complexity.
std::stable_sort(Ops.begin(), Ops.end(),
- [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
- return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
+ [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) {
+ return
+ CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0;
});
// Now that we are sorted by complexity, group elements of the same
@@ -2186,7 +2194,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, &LI);
+ GroupByComplexity(Ops, &LI, DT);
Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
@@ -2492,7 +2500,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// added together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx)
+ ++OtherIdx) {
+ // We expect the AddRecExpr's to be sorted in reverse dominance order,
+ // so that the 1st found AddRecExpr is dominated by all others.
+ assert(DT.dominates(
+ cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
+ AddRec->getLoop()->getHeader()) &&
+ "AddRecExprs are not sorted in reverse dominance order?");
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
@@ -2518,6 +2532,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
+ }
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
@@ -2614,7 +2629,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, &LI);
+ GroupByComplexity(Ops, &LI, DT);
Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
@@ -3211,7 +3226,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, &LI);
+ GroupByComplexity(Ops, &LI, DT);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -3312,7 +3327,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
#endif
// Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, &LI);
+ GroupByComplexity(Ops, &LI, DT);
// If there are any constants, fold them together.
unsigned Idx = 0;
@@ -4636,7 +4651,7 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
KnownBits Known(BitWidth);
computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC,
nullptr, &DT);
- return Known.Zero.countTrailingOnes();
+ return Known.countMinTrailingZeros();
}
// SCEVUDivExpr
@@ -5955,6 +5970,30 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
return false;
}
+ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
+ : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {}
+
+ScalarEvolution::ExitLimit::ExitLimit(
+ const SCEV *E, const SCEV *M, bool MaxOrZero,
+ ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
+ : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
+ assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
+ !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
+ "Exact is not allowed to be less precise than Max");
+ for (auto *PredSet : PredSetList)
+ for (auto *P : *PredSet)
+ addPredicate(P);
+}
+
+ScalarEvolution::ExitLimit::ExitLimit(
+ const SCEV *E, const SCEV *M, bool MaxOrZero,
+ const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
+ : ExitLimit(E, M, MaxOrZero, {&PredSet}) {}
+
+ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
+ bool MaxOrZero)
+ : ExitLimit(E, M, MaxOrZero, None) {}
+
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -6637,13 +6676,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
// {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
// bitwidth(K) iterations.
Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
- bool KnownZero, KnownOne;
- ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
- Predecessor->getTerminator(), &DT);
+ KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr,
+ Predecessor->getTerminator(), &DT);
auto *Ty = cast<IntegerType>(RHS->getType());
- if (KnownZero)
+ if (Known.isNonNegative())
StableValue = ConstantInt::get(Ty, 0);
- else if (KnownOne)
+ else if (Known.isNegative())
StableValue = ConstantInt::get(Ty, -1, true);
else
return getCouldNotCompute();
@@ -7377,48 +7415,49 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
const APInt &N = NC->getAPInt();
APInt Two(BitWidth, 2);
- {
- using namespace APIntOps;
- const APInt& C = L;
- // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
- // The B coefficient is M-N/2
- APInt B(M);
- B -= N.sdiv(Two);
+ // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
- // The A coefficient is N/2
- APInt A(N.sdiv(Two));
+ // The A coefficient is N/2
+ APInt A = N.sdiv(Two);
- // Compute the B^2-4ac term.
- APInt SqrtTerm(B);
- SqrtTerm *= B;
- SqrtTerm -= 4 * (A * C);
+ // The B coefficient is M-N/2
+ APInt B = M;
+ B -= A; // A is the same as N/2.
- if (SqrtTerm.isNegative()) {
- // The loop is provably infinite.
- return None;
- }
+ // The C coefficient is L.
+ const APInt& C = L;
- // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
- // integer value or else APInt::sqrt() will assert.
- APInt SqrtVal(SqrtTerm.sqrt());
+ // Compute the B^2-4ac term.
+ APInt SqrtTerm = B;
+ SqrtTerm *= B;
+ SqrtTerm -= 4 * (A * C);
- // Compute the two solutions for the quadratic formula.
- // The divisions must be performed as signed divisions.
- APInt NegB(-B);
- APInt TwoA(A << 1);
- if (TwoA.isMinValue())
- return None;
+ if (SqrtTerm.isNegative()) {
+ // The loop is provably infinite.
+ return None;
+ }
+
+ // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+ // integer value or else APInt::sqrt() will assert.
+ APInt SqrtVal = SqrtTerm.sqrt();
+
+ // Compute the two solutions for the quadratic formula.
+ // The divisions must be performed as signed divisions.
+ APInt NegB = -std::move(B);
+ APInt TwoA = std::move(A);
+ TwoA <<= 1;
+ if (TwoA.isNullValue())
+ return None;
- LLVMContext &Context = SE.getContext();
+ LLVMContext &Context = SE.getContext();
- ConstantInt *Solution1 =
- ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
- ConstantInt *Solution2 =
- ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+ ConstantInt *Solution1 =
+ ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+ ConstantInt *Solution2 =
+ ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
- return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
- cast<SCEVConstant>(SE.getConstant(Solution2)));
- } // end APIntOps namespace
+ return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
+ cast<SCEVConstant>(SE.getConstant(Solution2)));
}
ScalarEvolution::ExitLimit
@@ -8976,7 +9015,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
.getSignedMax();
// SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
- return (std::move(MaxValue) - std::move(MaxStrideMinusOne)).slt(MaxRHS);
+ return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
}
APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
@@ -8985,7 +9024,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
.getUnsignedMax();
// UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
- return (std::move(MaxValue) - std::move(MaxStrideMinusOne)).ult(MaxRHS);
+ return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
}
bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
@@ -9002,7 +9041,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
.getSignedMax();
// SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
- return (std::move(MinValue) + std::move(MaxStrideMinusOne)).sgt(MinRHS);
+ return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
}
APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
@@ -9011,7 +9050,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
.getUnsignedMax();
// UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
- return (std::move(MinValue) + std::move(MaxStrideMinusOne)).ugt(MinRHS);
+ return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
}
const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 848e1b4717b5..3cf1bbc5daa5 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -241,6 +241,50 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_tanhf);
}
+ // These definitions are due to math-finite.h header on Linux
+ TLI.setUnavailable(LibFunc_acos_finite);
+ TLI.setUnavailable(LibFunc_acosf_finite);
+ TLI.setUnavailable(LibFunc_acosl_finite);
+ TLI.setUnavailable(LibFunc_acosh_finite);
+ TLI.setUnavailable(LibFunc_acoshf_finite);
+ TLI.setUnavailable(LibFunc_acoshl_finite);
+ TLI.setUnavailable(LibFunc_asin_finite);
+ TLI.setUnavailable(LibFunc_asinf_finite);
+ TLI.setUnavailable(LibFunc_asinl_finite);
+ TLI.setUnavailable(LibFunc_atan2_finite);
+ TLI.setUnavailable(LibFunc_atan2f_finite);
+ TLI.setUnavailable(LibFunc_atan2l_finite);
+ TLI.setUnavailable(LibFunc_atanh_finite);
+ TLI.setUnavailable(LibFunc_atanhf_finite);
+ TLI.setUnavailable(LibFunc_atanhl_finite);
+ TLI.setUnavailable(LibFunc_cosh_finite);
+ TLI.setUnavailable(LibFunc_coshf_finite);
+ TLI.setUnavailable(LibFunc_coshl_finite);
+ TLI.setUnavailable(LibFunc_exp10_finite);
+ TLI.setUnavailable(LibFunc_exp10f_finite);
+ TLI.setUnavailable(LibFunc_exp10l_finite);
+ TLI.setUnavailable(LibFunc_exp2_finite);
+ TLI.setUnavailable(LibFunc_exp2f_finite);
+ TLI.setUnavailable(LibFunc_exp2l_finite);
+ TLI.setUnavailable(LibFunc_exp_finite);
+ TLI.setUnavailable(LibFunc_expf_finite);
+ TLI.setUnavailable(LibFunc_expl_finite);
+ TLI.setUnavailable(LibFunc_log10_finite);
+ TLI.setUnavailable(LibFunc_log10f_finite);
+ TLI.setUnavailable(LibFunc_log10l_finite);
+ TLI.setUnavailable(LibFunc_log2_finite);
+ TLI.setUnavailable(LibFunc_log2f_finite);
+ TLI.setUnavailable(LibFunc_log2l_finite);
+ TLI.setUnavailable(LibFunc_log_finite);
+ TLI.setUnavailable(LibFunc_logf_finite);
+ TLI.setUnavailable(LibFunc_logl_finite);
+ TLI.setUnavailable(LibFunc_pow_finite);
+ TLI.setUnavailable(LibFunc_powf_finite);
+ TLI.setUnavailable(LibFunc_powl_finite);
+ TLI.setUnavailable(LibFunc_sinh_finite);
+ TLI.setUnavailable(LibFunc_sinhf_finite);
+ TLI.setUnavailable(LibFunc_sinhl_finite);
+
// Win32 does *not* provide provide these functions, but they are
// generally available on POSIX-compliant systems:
TLI.setUnavailable(LibFunc_access);
@@ -496,7 +540,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
// Check for \01 prefix that is used to mangle __asm declarations and
// strip it if present.
- return GlobalValue::getRealLinkageName(funcName);
+ return GlobalValue::dropLLVMManglingEscape(funcName);
}
bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
@@ -1004,22 +1048,34 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 1 && FTy.getParamType(0)->isFloatingPointTy());
case LibFunc_acos:
+ case LibFunc_acos_finite:
case LibFunc_acosf:
+ case LibFunc_acosf_finite:
case LibFunc_acosh:
+ case LibFunc_acosh_finite:
case LibFunc_acoshf:
+ case LibFunc_acoshf_finite:
case LibFunc_acoshl:
+ case LibFunc_acoshl_finite:
case LibFunc_acosl:
+ case LibFunc_acosl_finite:
case LibFunc_asin:
+ case LibFunc_asin_finite:
case LibFunc_asinf:
+ case LibFunc_asinf_finite:
case LibFunc_asinh:
case LibFunc_asinhf:
case LibFunc_asinhl:
case LibFunc_asinl:
+ case LibFunc_asinl_finite:
case LibFunc_atan:
case LibFunc_atanf:
case LibFunc_atanh:
+ case LibFunc_atanh_finite:
case LibFunc_atanhf:
+ case LibFunc_atanhf_finite:
case LibFunc_atanhl:
+ case LibFunc_atanhl_finite:
case LibFunc_atanl:
case LibFunc_cbrt:
case LibFunc_cbrtf:
@@ -1030,18 +1086,30 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_cos:
case LibFunc_cosf:
case LibFunc_cosh:
+ case LibFunc_cosh_finite:
case LibFunc_coshf:
+ case LibFunc_coshf_finite:
case LibFunc_coshl:
+ case LibFunc_coshl_finite:
case LibFunc_cosl:
case LibFunc_exp10:
+ case LibFunc_exp10_finite:
case LibFunc_exp10f:
+ case LibFunc_exp10f_finite:
case LibFunc_exp10l:
+ case LibFunc_exp10l_finite:
case LibFunc_exp2:
+ case LibFunc_exp2_finite:
case LibFunc_exp2f:
+ case LibFunc_exp2f_finite:
case LibFunc_exp2l:
+ case LibFunc_exp2l_finite:
case LibFunc_exp:
+ case LibFunc_exp_finite:
case LibFunc_expf:
+ case LibFunc_expf_finite:
case LibFunc_expl:
+ case LibFunc_expl_finite:
case LibFunc_expm1:
case LibFunc_expm1f:
case LibFunc_expm1l:
@@ -1052,20 +1120,29 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_floorf:
case LibFunc_floorl:
case LibFunc_log10:
+ case LibFunc_log10_finite:
case LibFunc_log10f:
+ case LibFunc_log10f_finite:
case LibFunc_log10l:
+ case LibFunc_log10l_finite:
case LibFunc_log1p:
case LibFunc_log1pf:
case LibFunc_log1pl:
case LibFunc_log2:
+ case LibFunc_log2_finite:
case LibFunc_log2f:
+ case LibFunc_log2f_finite:
case LibFunc_log2l:
+ case LibFunc_log2l_finite:
case LibFunc_log:
+ case LibFunc_log_finite:
case LibFunc_logb:
case LibFunc_logbf:
case LibFunc_logbl:
case LibFunc_logf:
+ case LibFunc_logf_finite:
case LibFunc_logl:
+ case LibFunc_logl_finite:
case LibFunc_nearbyint:
case LibFunc_nearbyintf:
case LibFunc_nearbyintl:
@@ -1078,8 +1155,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_sin:
case LibFunc_sinf:
case LibFunc_sinh:
+ case LibFunc_sinh_finite:
case LibFunc_sinhf:
+ case LibFunc_sinhf_finite:
case LibFunc_sinhl:
+ case LibFunc_sinhl_finite:
case LibFunc_sinl:
case LibFunc_sqrt:
case LibFunc_sqrt_finite:
@@ -1100,8 +1180,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getReturnType() == FTy.getParamType(0));
case LibFunc_atan2:
+ case LibFunc_atan2_finite:
case LibFunc_atan2f:
+ case LibFunc_atan2f_finite:
case LibFunc_atan2l:
+ case LibFunc_atan2l_finite:
case LibFunc_fmin:
case LibFunc_fminf:
case LibFunc_fminl:
@@ -1115,8 +1198,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_copysignf:
case LibFunc_copysignl:
case LibFunc_pow:
+ case LibFunc_pow_finite:
case LibFunc_powf:
+ case LibFunc_powf_finite:
case LibFunc_powl:
+ case LibFunc_powl_finite:
return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() &&
FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getReturnType() == FTy.getParamType(1));
@@ -1294,6 +1380,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
{"powf", "__svml_powf8", 8},
{"powf", "__svml_powf16", 16},
+ { "__pow_finite", "__svml_pow2", 2 },
+ { "__pow_finite", "__svml_pow4", 4 },
+ { "__pow_finite", "__svml_pow8", 8 },
+
+ { "__powf_finite", "__svml_powf4", 4 },
+ { "__powf_finite", "__svml_powf8", 8 },
+ { "__powf_finite", "__svml_powf16", 16 },
+
{"llvm.pow.f64", "__svml_pow2", 2},
{"llvm.pow.f64", "__svml_pow4", 4},
{"llvm.pow.f64", "__svml_pow8", 8},
@@ -1310,6 +1404,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
{"expf", "__svml_expf8", 8},
{"expf", "__svml_expf16", 16},
+ { "__exp_finite", "__svml_exp2", 2 },
+ { "__exp_finite", "__svml_exp4", 4 },
+ { "__exp_finite", "__svml_exp8", 8 },
+
+ { "__expf_finite", "__svml_expf4", 4 },
+ { "__expf_finite", "__svml_expf8", 8 },
+ { "__expf_finite", "__svml_expf16", 16 },
+
{"llvm.exp.f64", "__svml_exp2", 2},
{"llvm.exp.f64", "__svml_exp4", 4},
{"llvm.exp.f64", "__svml_exp8", 8},
@@ -1326,6 +1428,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
{"logf", "__svml_logf8", 8},
{"logf", "__svml_logf16", 16},
+ { "__log_finite", "__svml_log2", 2 },
+ { "__log_finite", "__svml_log4", 4 },
+ { "__log_finite", "__svml_log8", 8 },
+
+ { "__logf_finite", "__svml_logf4", 4 },
+ { "__logf_finite", "__svml_logf8", 8 },
+ { "__logf_finite", "__svml_logf16", 16 },
+
{"llvm.log.f64", "__svml_log2", 2},
{"llvm.log.f64", "__svml_log4", 4},
{"llvm.log.f64", "__svml_log8", 8},
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 26d606cce9bb..8a5d10473662 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -279,6 +279,10 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return TTIImpl->getRegisterBitWidth(Vector);
}
+unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
+ return TTIImpl->getMinVectorRegisterBitWidth();
+}
+
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
return TTIImpl->shouldConsiderAddressTypePromotion(
@@ -500,6 +504,15 @@ unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
}
+bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
+ Type *Ty, ReductionFlags Flags) const {
+ return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
+}
+
+bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
+ return TTIImpl->shouldExpandReduction(II);
+}
+
TargetTransformInfo::Concept::~Concept() {}
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index a7f3ff672aef..cba7363a0afa 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -88,9 +88,8 @@ struct Query {
/// classic case of this is assume(x = y), which will attempt to determine
/// bits in x from bits in y, which will attempt to determine bits in y from
/// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
- /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
- /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so
- /// on.
+ /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
+ /// (all of which can call computeKnownBits), and so on.
std::array<const Value *, MaxDepth> Excluded;
unsigned NumExcluded;
@@ -143,6 +142,16 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known,
Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
}
+static KnownBits computeKnownBits(const Value *V, unsigned Depth,
+ const Query &Q);
+
+KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+}
+
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI,
@@ -159,16 +168,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
}
-static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
- unsigned Depth, const Query &Q);
-
-void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout &DL, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- ::ComputeSignBit(V, KnownZero, KnownOne, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT));
-}
static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q);
@@ -194,9 +193,8 @@ bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- bool NonNegative, Negative;
- ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
- return NonNegative;
+ KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+ return Known.isNonNegative();
}
bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
@@ -214,9 +212,8 @@ bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- bool NonNegative, Negative;
- ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
- return Negative;
+ KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+ return Known.isNegative();
}
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
@@ -342,10 +339,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
// Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- unsigned TrailZ = Known.Zero.countTrailingOnes() +
- Known2.Zero.countTrailingOnes();
- unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() +
- Known2.Zero.countLeadingOnes(),
+ unsigned TrailZ = Known.countMinTrailingZeros() +
+ Known2.countMinTrailingZeros();
+ unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
+ Known2.countMinLeadingZeros(),
BitWidth) - BitWidth;
TrailZ = std::min(TrailZ, BitWidth);
@@ -750,8 +747,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// Whatever high bits in c are zero are known to be zero.
- Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes());
- // assume(v <_u c)
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+ // assume(v <_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_ULT &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
@@ -761,9 +758,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
// Whatever high bits in c are zero are known to be zero (if c is a power
// of 2, then one more).
if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
- Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()+1);
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
else
- Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes());
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
}
}
@@ -916,7 +913,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
m_Value(Y))))) {
Known2.resetAll();
computeKnownBits(Y, Known2, Depth + 1, Q);
- if (Known2.One.countTrailingOnes() > 0)
+ if (Known2.countMinTrailingOnes() > 0)
Known.Zero.setBit(0);
}
break;
@@ -953,14 +950,13 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
- unsigned LeadZ = Known2.Zero.countLeadingOnes();
+ unsigned LeadZ = Known2.countMinLeadingZeros();
Known2.resetAll();
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
- unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros();
- if (RHSUnknownLeadingOnes != BitWidth)
- LeadZ = std::min(BitWidth,
- LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+ unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
+ if (RHSMaxLeadingZeros != BitWidth)
+ LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
Known.Zero.setHighBits(LeadZ);
break;
@@ -983,8 +979,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
if (Known.isNegative() && Known2.isNegative())
// We can derive a lower bound on the result by taking the max of the
// leading one bits.
- MaxHighOnes = std::max(Known.One.countLeadingOnes(),
- Known2.One.countLeadingOnes());
+ MaxHighOnes =
+ std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
// If either side is non-negative, the result is non-negative.
else if (Known.isNonNegative() || Known2.isNonNegative())
MaxHighZeros = 1;
@@ -993,8 +989,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
if (Known.isNonNegative() && Known2.isNonNegative())
// We can derive an upper bound on the result by taking the max of the
// leading zero bits.
- MaxHighZeros = std::max(Known.Zero.countLeadingOnes(),
- Known2.Zero.countLeadingOnes());
+ MaxHighZeros = std::max(Known.countMinLeadingZeros(),
+ Known2.countMinLeadingZeros());
// If either side is negative, the result is negative.
else if (Known.isNegative() || Known2.isNegative())
MaxHighOnes = 1;
@@ -1002,12 +998,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// We can derive a lower bound on the result by taking the max of the
// leading one bits.
MaxHighOnes =
- std::max(Known.One.countLeadingOnes(), Known2.One.countLeadingOnes());
+ std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
} else if (SPF == SPF_UMIN) {
// We can derive an upper bound on the result by taking the max of the
// leading zero bits.
MaxHighZeros =
- std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes());
+ std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
}
// Only known if known in both the LHS and RHS.
@@ -1185,8 +1181,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
- unsigned Leaders = std::max(Known.Zero.countLeadingOnes(),
- Known2.Zero.countLeadingOnes());
+ unsigned Leaders =
+ std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
Known.resetAll();
Known.Zero.setHighBits(Leaders);
break;
@@ -1207,7 +1203,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// to determine if we can prove known low zero bits.
KnownBits LocalKnown(BitWidth);
computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
- unsigned TrailZ = LocalKnown.Zero.countTrailingOnes();
+ unsigned TrailZ = LocalKnown.countMinTrailingZeros();
gep_type_iterator GTI = gep_type_begin(I);
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
@@ -1241,7 +1237,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
computeKnownBits(Index, LocalKnown, Depth + 1, Q);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
- LocalKnown.Zero.countTrailingOnes()));
+ LocalKnown.countMinTrailingZeros()));
}
}
@@ -1286,8 +1282,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
KnownBits Known3(Known);
computeKnownBits(L, Known3, Depth + 1, Q);
- Known.Zero.setLowBits(std::min(Known2.Zero.countTrailingOnes(),
- Known3.Zero.countTrailingOnes()));
+ Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
+ Known3.countMinTrailingZeros()));
if (DontImproveNonNegativePhiBits)
break;
@@ -1386,12 +1382,25 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known.Zero |= Known2.Zero.byteSwap();
Known.One |= Known2.One.byteSwap();
break;
- case Intrinsic::ctlz:
+ case Intrinsic::ctlz: {
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleLZ = Known2.One.countLeadingZeros();
+ // If this call is undefined for 0, the result will be less than 2^n.
+ if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+ PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
+ unsigned LowBits = Log2_32(PossibleLZ)+1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
case Intrinsic::cttz: {
- unsigned LowBits = Log2_32(BitWidth)+1;
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleTZ = Known2.One.countTrailingZeros();
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
- LowBits -= 1;
+ PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
+ unsigned LowBits = Log2_32(PossibleTZ)+1;
Known.Zero.setBitsFrom(LowBits);
break;
}
@@ -1399,7 +1408,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// We can bound the space the count needs. Also, bits known to be zero
// can't contribute to the population.
- unsigned BitsPossiblySet = BitWidth - Known2.Zero.countPopulation();
+ unsigned BitsPossiblySet = Known2.countMaxPopulation();
unsigned LowBits = Log2_32(BitsPossiblySet)+1;
Known.Zero.setBitsFrom(LowBits);
// TODO: we could bound KnownOne using the lower bound on the number
@@ -1450,6 +1459,14 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
}
/// Determine which bits of V are known to be either zero or one and return
+/// them.
+KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
+ KnownBits Known(getBitWidth(V->getType(), Q.DL));
+ computeKnownBits(V, Known, Depth, Q);
+ return Known;
+}
+
+/// Determine which bits of V are known to be either zero or one and return
/// them in the Known bit set.
///
/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
@@ -1568,16 +1585,6 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
}
-/// Determine whether the sign bit is known to be zero or one.
-/// Convenience wrapper around computeKnownBits.
-void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
- unsigned Depth, const Query &Q) {
- KnownBits Bits(getBitWidth(V->getType(), Q.DL));
- computeKnownBits(V, Bits, Depth, Q);
- KnownOne = Bits.isNegative();
- KnownZero = Bits.isNonNegative();
-}
-
/// Return true if the given value is known to have exactly one
/// bit set when defined. For vectors return true if every element is known to
/// be a power of two when defined. Supports values with integer or pointer
@@ -1842,24 +1849,20 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (BO->isExact())
return isKnownNonZero(X, Depth, Q);
- bool XKnownNonNegative, XKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q);
- if (XKnownNegative)
+ KnownBits Known = computeKnownBits(X, Depth, Q);
+ if (Known.isNegative())
return true;
// If the shifter operand is a constant, and all of the bits shifted
// out are known to be zero, and X is known non-zero then at least one
// non-zero bit must remain.
if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
- KnownBits Known(BitWidth);
- computeKnownBits(X, Known, Depth, Q);
-
auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
// Is there a known one in the portion not shifted out?
- if (Known.One.countLeadingZeros() < BitWidth - ShiftVal)
+ if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
return true;
// Are all the bits to be shifted out known zero?
- if (Known.Zero.countTrailingOnes() >= ShiftVal)
+ if (Known.countMinTrailingZeros() >= ShiftVal)
return isKnownNonZero(X, Depth, Q);
}
}
@@ -1869,39 +1872,34 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
// X + Y.
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
- bool XKnownNonNegative, XKnownNegative;
- bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Depth, Q);
+ KnownBits XKnown = computeKnownBits(X, Depth, Q);
+ KnownBits YKnown = computeKnownBits(Y, Depth, Q);
// If X and Y are both non-negative (as signed values) then their sum is not
// zero unless both X and Y are zero.
- if (XKnownNonNegative && YKnownNonNegative)
+ if (XKnown.isNonNegative() && YKnown.isNonNegative())
if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q))
return true;
// If X and Y are both negative (as signed values) then their sum is not
// zero unless both X and Y equal INT_MIN.
- if (XKnownNegative && YKnownNegative) {
- KnownBits Known(BitWidth);
+ if (XKnown.isNegative() && YKnown.isNegative()) {
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- computeKnownBits(X, Known, Depth, Q);
- if (Known.One.intersects(Mask))
+ if (XKnown.One.intersects(Mask))
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- computeKnownBits(Y, Known, Depth, Q);
- if (Known.One.intersects(Mask))
+ if (YKnown.One.intersects(Mask))
return true;
}
// The sum of a non-negative number and a power of two is not zero.
- if (XKnownNonNegative &&
+ if (XKnown.isNonNegative() &&
isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
return true;
- if (YKnownNonNegative &&
+ if (YKnown.isNonNegative() &&
isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
return true;
}
@@ -2276,14 +2274,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// If we know that the sign bit is either zero or one, determine the number of
// identical bits in the top of the input value.
- if (Known.isNonNegative())
- return std::max(FirstAnswer, Known.Zero.countLeadingOnes());
-
- if (Known.isNegative())
- return std::max(FirstAnswer, Known.One.countLeadingOnes());
-
- // computeKnownBits gave us no extra information about the top bits.
- return FirstAnswer;
+ return std::max(FirstAnswer, Known.countMinSignBits());
}
/// This function computes the integer multiple of Base that equals V.
@@ -3441,8 +3432,8 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
// Note that underestimating the number of zero bits gives a more
// conservative answer.
- unsigned ZeroBits = LHSKnown.Zero.countLeadingOnes() +
- RHSKnown.Zero.countLeadingOnes();
+ unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
+ RHSKnown.countMinLeadingZeros();
// First handle the easy case: if we have enough zero bits there's
// definitely no overflow.
if (ZeroBits >= BitWidth)
@@ -3475,21 +3466,17 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- bool LHSKnownNonNegative, LHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
- AC, CxtI, DT);
- if (LHSKnownNonNegative || LHSKnownNegative) {
- bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
- AC, CxtI, DT);
-
- if (LHSKnownNegative && RHSKnownNegative) {
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+
+ if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
// The sign bit is set in both cases: this MUST overflow.
// Create a simple add instruction, and insert it into the struct.
return OverflowResult::AlwaysOverflows;
}
- if (LHSKnownNonNegative && RHSKnownNonNegative) {
+ if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
// The sign bit is clear in both cases: this CANNOT overflow.
// Create a simple add instruction, and insert it into the struct.
return OverflowResult::NeverOverflows;
@@ -3499,6 +3486,51 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
return OverflowResult::MayOverflow;
}
+/// \brief Return true if we can prove that adding the two values of the
+/// knownbits will not overflow.
+/// Otherwise return false.
+static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
+ const KnownBits &RHSKnown) {
+ // Addition of two 2's complement numbers having opposite signs will never
+ // overflow.
+ if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
+ (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
+ return true;
+
+ // If either of the values is known to be non-negative, adding them can only
+ // overflow if the second is also non-negative, so we can assume that.
+ // Two non-negative numbers will only overflow if there is a carry to the
+ // sign bit, so we can check if even when the values are as big as possible
+ // there is no overflow to the sign bit.
+ if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
+ APInt MaxLHS = ~LHSKnown.Zero;
+ MaxLHS.clearSignBit();
+ APInt MaxRHS = ~RHSKnown.Zero;
+ MaxRHS.clearSignBit();
+ APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
+ return Result.isSignBitClear();
+ }
+
+ // If either of the values is known to be negative, adding them can only
+ // overflow if the second is also negative, so we can assume that.
+ // Two negative number will only overflow if there is no carry to the sign
+ // bit, so we can check if even when the values are as small as possible
+ // there is overflow to the sign bit.
+ if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
+ APInt MinLHS = LHSKnown.One;
+ MinLHS.clearSignBit();
+ APInt MinRHS = RHSKnown.One;
+ MinRHS.clearSignBit();
+ APInt Result = std::move(MinLHS) + std::move(MinRHS);
+ return Result.isSignBitSet();
+ }
+
+ // If we reached here it means that we know nothing about the sign bits.
+ // In this case we can't know if there will be an overflow, since by
+ // changing the sign bits any two values can be made to overflow.
+ return false;
+}
+
static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
const Value *RHS,
const AddOperator *Add,
@@ -3510,18 +3542,29 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
return OverflowResult::NeverOverflows;
}
- bool LHSKnownNonNegative, LHSKnownNegative;
- bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
- AC, CxtI, DT);
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
- AC, CxtI, DT);
+ // If LHS and RHS each have at least two sign bits, the addition will look
+ // like
+ //
+ // XX..... +
+ // YY.....
+ //
+ // If the carry into the most significant position is 0, X and Y can't both
+ // be 1 and therefore the carry out of the addition is also 0.
+ //
+ // If the carry into the most significant position is 1, X and Y can't both
+ // be 0 and therefore the carry out of the addition is also 1.
+ //
+ // Since the carry into the most significant position is always equal to
+ // the carry out of the addition, there is no signed overflow.
+ if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
+ ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
+ return OverflowResult::NeverOverflows;
+
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
- if ((LHSKnownNonNegative && RHSKnownNegative) ||
- (LHSKnownNegative && RHSKnownNonNegative)) {
- // The sign bits are opposite: this CANNOT overflow.
+ if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
return OverflowResult::NeverOverflows;
- }
// The remaining code needs Add to be available. Early returns if not so.
if (!Add)
@@ -3532,14 +3575,13 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
// @llvm.assume'ed non-negative rather than proved so from analyzing its
// operands.
bool LHSOrRHSKnownNonNegative =
- (LHSKnownNonNegative || RHSKnownNonNegative);
- bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+ (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
+ bool LHSOrRHSKnownNegative =
+ (LHSKnown.isNegative() || RHSKnown.isNegative());
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
- bool AddKnownNonNegative, AddKnownNegative;
- ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
- /*Depth=*/0, AC, CxtI, DT);
- if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
- (AddKnownNegative && LHSOrRHSKnownNegative)) {
+ KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
+ if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
+ (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
return OverflowResult::NeverOverflows;
}
}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 722f17a8067e..2d2249da4e13 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
using namespace llvm;
using namespace llvm::PatternMatch;