diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/BasicAliasAnalysis.cpp | 9 | ||||
-rw-r--r-- | lib/Analysis/BranchProbabilityInfo.cpp | 11 | ||||
-rw-r--r-- | lib/Analysis/CallGraph.cpp | 34 | ||||
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 80 | ||||
-rw-r--r-- | lib/Analysis/DemandedBits.cpp | 10 | ||||
-rw-r--r-- | lib/Analysis/InlineCost.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 167 | ||||
-rw-r--r-- | lib/Analysis/ModuleSummaryAnalysis.cpp | 3 | ||||
-rw-r--r-- | lib/Analysis/OptimizationDiagnosticInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/ProfileSummaryInfo.cpp | 13 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolution.cpp | 159 | ||||
-rw-r--r-- | lib/Analysis/TargetLibraryInfo.cpp | 112 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 13 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 276 | ||||
-rw-r--r-- | lib/Analysis/VectorUtils.cpp | 1 |
15 files changed, 569 insertions, 323 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 537823020301..a33c01a0e461 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -17,13 +17,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -36,6 +36,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include <algorithm> #define DEBUG_TYPE "basicaa" @@ -1283,9 +1284,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // give up if we can't determine conditions that hold for every cycle: const Value *V = DecompGEP1.VarIndices[i].V; - bool SignKnownZero, SignKnownOne; - ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL, - 0, &AC, nullptr, DT); + KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT); + bool SignKnownZero = Known.isNonNegative(); + bool SignKnownOne = Known.isNegative(); // Zero-extension widens the variable, and so forces the sign // bit to zero. diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 0dc4475ca0e2..db87b17c1567 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -301,6 +301,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { WeightSum += Weights[i]; } } + assert(WeightSum <= UINT32_MAX && + "Expected weights to scale down to 32 bits"); if (WeightSum == 0 || ReachableIdxs.size() == 0) { for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) @@ -328,21 +330,14 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { // the difference between reachable blocks. if (ToDistribute > BranchProbability::getZero()) { BranchProbability PerEdge = ToDistribute / ReachableIdxs.size(); - for (auto i : ReachableIdxs) { + for (auto i : ReachableIdxs) BP[i] += PerEdge; - ToDistribute -= PerEdge; - } - // Tail goes to the first reachable edge. - BP[ReachableIdxs[0]] += ToDistribute; } } for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) setEdgeProbability(BB, i, BP[i]); - assert(WeightSum <= UINT32_MAX && - "Expected weights to scale down to 32 bits"); - return true; } diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 6942176ae6ae..ff5242f69a1b 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -21,23 +21,18 @@ using namespace llvm; // CallGraph::CallGraph(Module &M) - : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), + : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)), CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); - - // If we didn't find a main function, use the external call graph node - if (!Root) - Root = ExternalCallingNode; } CallGraph::CallGraph(CallGraph &&Arg) - : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root), + : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), ExternalCallingNode(Arg.ExternalCallingNode), CallsExternalNode(std::move(Arg.CallsExternalNode)) { Arg.FunctionMap.clear(); - Arg.Root = nullptr; Arg.ExternalCallingNode = nullptr; } @@ -57,21 +52,9 @@ CallGraph::~CallGraph() { void CallGraph::addToCallGraph(Function *F) { CallGraphNode *Node = getOrInsertFunction(F); - // If this function has external linkage, anything could call it. - if (!F->hasLocalLinkage()) { - ExternalCallingNode->addCalledFunction(CallSite(), Node); - - // Found the entry point? - if (F->getName() == "main") { - if (Root) // Found multiple external mains? Don't pick one. - Root = ExternalCallingNode; - else - Root = Node; // Found a main, keep track of it! - } - } - - // If this function has its address taken, anything could call it. - if (F->hasAddressTaken()) + // If this function has external linkage or has its address taken, anything + // could call it. + if (!F->hasLocalLinkage() || F->hasAddressTaken()) ExternalCallingNode->addCalledFunction(CallSite(), Node); // If this function is not defined in this translation unit, it could call @@ -96,13 +79,6 @@ void CallGraph::addToCallGraph(Function *F) { } void CallGraph::print(raw_ostream &OS) const { - OS << "CallGraph Root is: "; - if (Function *F = Root->getFunction()) - OS << F->getName() << "\n"; - else { - OS << "<<null function: 0x" << Root << ">>\n"; - } - // Print in a deterministic order by sorting CallGraphNodes by name. We do // this here to avoid slowing down the non-printing fast path. diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 130e917e49d7..0ca712bbfe70 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1438,6 +1438,36 @@ bool llvm::canConstantFoldCallTo(const Function *F) { Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; case 't': return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; + case '_': + + // Check for various function names that get used for the math functions + // when the header files are preprocessed with the macro + // __FINITE_MATH_ONLY__ enabled. + // The '12' here is the length of the shortest name that can match. + // We need to check the size before looking at Name[1] and Name[2] + // so we may as well check a limit that will eliminate mismatches. + if (Name.size() < 12 || Name[1] != '_') + return false; + switch (Name[2]) { + default: + return false; + case 'a': + return Name == "__acos_finite" || Name == "__acosf_finite" || + Name == "__asin_finite" || Name == "__asinf_finite" || + Name == "__atan2_finite" || Name == "__atan2f_finite"; + case 'c': + return Name == "__cosh_finite" || Name == "__coshf_finite"; + case 'e': + return Name == "__exp_finite" || Name == "__expf_finite" || + Name == "__exp2_finite" || Name == "__exp2f_finite"; + case 'l': + return Name == "__log_finite" || Name == "__logf_finite" || + Name == "__log10_finite" || Name == "__log10f_finite"; + case 'p': + return Name == "__pow_finite" || Name == "__powf_finite"; + case 's': + return Name == "__sinh_finite" || Name == "__sinhf_finite"; + } } } @@ -1637,13 +1667,21 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, if (!TLI) return nullptr; - switch (Name[0]) { + char NameKeyChar = Name[0]; + if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') + NameKeyChar = Name[2]; + + switch (NameKeyChar) { case 'a': if ((Name == "acos" && TLI->has(LibFunc_acos)) || - (Name == "acosf" && TLI->has(LibFunc_acosf))) + (Name == "acosf" && TLI->has(LibFunc_acosf)) || + (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || + (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) return ConstantFoldFP(acos, V, Ty); else if ((Name == "asin" && TLI->has(LibFunc_asin)) || - (Name == "asinf" && TLI->has(LibFunc_asinf))) + (Name == "asinf" && TLI->has(LibFunc_asinf)) || + (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || + (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) return ConstantFoldFP(asin, V, Ty); else if ((Name == "atan" && TLI->has(LibFunc_atan)) || (Name == "atanf" && TLI->has(LibFunc_atanf))) @@ -1657,15 +1695,21 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, (Name == "cosf" && TLI->has(LibFunc_cosf))) return ConstantFoldFP(cos, V, Ty); else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || - (Name == "coshf" && TLI->has(LibFunc_coshf))) + (Name == "coshf" && TLI->has(LibFunc_coshf)) || + (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || + (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) return ConstantFoldFP(cosh, V, Ty); break; case 'e': if ((Name == "exp" && TLI->has(LibFunc_exp)) || - (Name == "expf" && TLI->has(LibFunc_expf))) + (Name == "expf" && TLI->has(LibFunc_expf)) || + (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || + (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) return ConstantFoldFP(exp, V, Ty); if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || - (Name == "exp2f" && TLI->has(LibFunc_exp2f))) + (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || + (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || + (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); @@ -1680,10 +1724,18 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, break; case 'l': if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc_logf))) + (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || + (Name == "__log_finite" && V > 0 && + TLI->has(LibFunc_log_finite)) || + (Name == "__logf_finite" && V > 0 && + TLI->has(LibFunc_logf_finite))) return ConstantFoldFP(log, V, Ty); else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f))) + (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || + (Name == "__log10_finite" && V > 0 && + TLI->has(LibFunc_log10_finite)) || + (Name == "__log10f_finite" && V > 0 && + TLI->has(LibFunc_log10f_finite))) return ConstantFoldFP(log10, V, Ty); break; case 'r': @@ -1695,7 +1747,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, (Name == "sinf" && TLI->has(LibFunc_sinf))) return ConstantFoldFP(sin, V, Ty); else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || - (Name == "sinhf" && TLI->has(LibFunc_sinhf))) + (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || + (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || + (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) return ConstantFoldFP(sinh, V, Ty); else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) @@ -1813,13 +1867,17 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, if (!TLI) return nullptr; if ((Name == "pow" && TLI->has(LibFunc_pow)) || - (Name == "powf" && TLI->has(LibFunc_powf))) + (Name == "powf" && TLI->has(LibFunc_powf)) || + (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || + (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || (Name == "fmodf" && TLI->has(LibFunc_fmodf))) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || - (Name == "atan2f" && TLI->has(LibFunc_atan2f))) + (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || + (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || + (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 9f5dc5318239..8f808f3e7871 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -86,13 +86,11 @@ void DemandedBits::determineLiveOperandBits( [&](unsigned BitWidth, const Value *V1, const Value *V2) { const DataLayout &DL = I->getModule()->getDataLayout(); Known = KnownBits(BitWidth); - computeKnownBits(const_cast<Value *>(V1), Known, DL, 0, - &AC, UserI, &DT); + computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT); if (V2) { Known2 = KnownBits(BitWidth); - computeKnownBits(const_cast<Value *>(V2), Known2, DL, - 0, &AC, UserI, &DT); + computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT); } }; @@ -118,7 +116,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getHighBitsSet(BitWidth, - std::min(BitWidth, Known.One.countLeadingZeros()+1)); + std::min(BitWidth, Known.countMaxLeadingZeros()+1)); } break; case Intrinsic::cttz: @@ -128,7 +126,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getLowBitsSet(BitWidth, - std::min(BitWidth, Known.One.countTrailingZeros()+1)); + std::min(BitWidth, Known.countMaxTrailingZeros()+1)); } break; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 100a591e452c..44c14cb17c22 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -63,7 +63,7 @@ static cl::opt<bool> // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt<int> ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(225), + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt<int> diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 4a713f441ce8..5728887cc1e9 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1317,7 +1317,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If all valid bits in the shift amount are known zero, the first operand is // unchanged. unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); - if (Known.Zero.countTrailingOnes() >= NumValidShiftBits) + if (Known.countMinTrailingZeros() >= NumValidShiftBits) return Op0; return nullptr; @@ -1536,7 +1536,7 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0); auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1); - // For and-of-comapares, check if the intersection is empty: + // For and-of-compares, check if the intersection is empty: // (icmp X, C0) && (icmp X, C1) --> empty set --> false if (IsAnd && Range0.intersectWith(Range1).isEmptySet()) return getFalse(Cmp0->getType()); @@ -1870,6 +1870,24 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) return Op0; + // (A & B) | (~A ^ B) -> (~A ^ B) + // (B & A) | (~A ^ B) -> (~A ^ B) + // (A & B) | (B ^ ~A) -> (B ^ ~A) + // (B & A) | (B ^ ~A) -> (B ^ ~A) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) + return Op1; + + // (~A ^ B) | (A & B) -> (~A ^ B) + // (~A ^ B) | (B & A) -> (~A ^ B) + // (B ^ ~A) | (A & B) -> (B ^ ~A) + // (B ^ ~A) | (B & A) -> (B ^ ~A) + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) + return Op0; + if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false)) return V; @@ -2286,7 +2304,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return nullptr; Type *ITy = GetCompareTy(LHS); // The return type. - bool LHSKnownNonNegative, LHSKnownNegative; switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); @@ -2304,39 +2321,41 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; - case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + case ICmpInst::ICMP_SLT: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnownNonNegative) + if (LHSKnown.isNonNegative()) return getFalse(ITy); break; - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SLE: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (LHSKnown.isNonNegative() && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SGE: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnownNonNegative) + if (LHSKnown.isNonNegative()) return getTrue(ITy); break; - case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SGT: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (LHSKnown.isNonNegative() && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; } + } return nullptr; } @@ -2535,6 +2554,9 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, return nullptr; } +/// TODO: A large part of this logic is duplicated in InstCombine's +/// foldICmpBinOp(). We should be able to share that and avoid the code +/// duplication. static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -2616,15 +2638,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getTrue(ITy); if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { - bool RHSKnownNonNegative, RHSKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, Q.DL, 0, - Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (RHSKnownNonNegative && YKnownNegative) + KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (RHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); - if (RHSKnownNegative || YKnownNonNegative) + if (RHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); } } @@ -2636,15 +2654,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getFalse(ITy); if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) { - bool LHSKnownNonNegative, LHSKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, - Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNonNegative && YKnownNegative) + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); - if (LHSKnownNegative || YKnownNonNegative) + if (LHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy); } } @@ -2691,28 +2705,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { - bool KnownNonNegative, KnownNegative; switch (Pred) { default: break; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getFalse(ITy); case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2722,28 +2735,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp pred X, (urem Y, X) if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { - bool KnownNonNegative, KnownNegative; switch (Pred) { default: break; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2815,10 +2827,19 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, break; case Instruction::UDiv: case Instruction::LShr: - if (ICmpInst::isSigned(Pred)) + if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact()) break; - LLVM_FALLTHROUGH; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse - 1)) + return V; + break; case Instruction::SDiv: + if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse - 1)) + return V; + break; case Instruction::AShr: if (!LBO->isExact() || !RBO->isExact()) break; @@ -4034,24 +4055,21 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, /// match a root vector source operand that contains that element in the same /// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, - Constant *Mask, Value *RootVec, int RootElt, + int MaskVal, Value *RootVec, unsigned MaxRecurse) { if (!MaxRecurse--) return nullptr; // Bail out if any mask value is undefined. That kind of shuffle may be // simplified further based on demanded bits or other folds. - int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt); if (MaskVal == -1) return nullptr; // The mask value chooses which source operand we need to look at next. - Value *SourceOp; int InVecNumElts = Op0->getType()->getVectorNumElements(); - if (MaskVal < InVecNumElts) { - RootElt = MaskVal; - SourceOp = Op0; - } else { + int RootElt = MaskVal; + Value *SourceOp = Op0; + if (MaskVal >= InVecNumElts) { RootElt = MaskVal - InVecNumElts; SourceOp = Op1; } @@ -4061,7 +4079,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) { return foldIdentityShuffles( DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1), - SourceShuf->getMask(), RootVec, RootElt, MaxRecurse); + SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse); } // TODO: Look through bitcasts? What if the bitcast changes the vector element @@ -4126,17 +4144,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, // second one. if (Op0Const && !Op1Const) { std::swap(Op0, Op1); - for (int &Idx : Indices) { - if (Idx == -1) - continue; - Idx = Idx < (int)InVecNumElts ? Idx + InVecNumElts : Idx - InVecNumElts; - assert(Idx >= 0 && Idx < (int)InVecNumElts * 2 && - "shufflevector mask index out of range"); - } - Mask = ConstantDataVector::get( - Mask->getContext(), - makeArrayRef(reinterpret_cast<uint32_t *>(Indices.data()), - MaskNumElts)); + ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); } // A shuffle of a splat is always the splat itself. Legal if the shuffle's @@ -4160,7 +4168,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, for (unsigned i = 0; i != MaskNumElts; ++i) { // Note that recursion is limited for each vector element, so if any element // exceeds the limit, this will fail to simplify. - RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse); + RootVec = + foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse); // We can't replace a widening/narrowing shuffle with one of its operands. if (!RootVec || RootVec->getType() != RetTy) diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 99f900ae3932..26706f5509ba 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -232,7 +232,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } // We should have named any anonymous globals assert(CalledFunction->hasName()); - auto ScaledCount = ProfileSummaryInfo::getProfileCount(&I, BFI); + auto ScaledCount = PSI->getProfileCount(&I, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; @@ -330,6 +330,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( const Module &M, std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI) { + assert(PSI); ModuleSummaryIndex Index; // Identify the local values in the llvm.used and llvm.compiler.used sets, diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp index 73245981b022..e38e530c052d 100644 --- a/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -101,7 +101,7 @@ void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping( // These are read-only for now. DiagnosticLocation DL = OptDiag->getLocation(); StringRef FN = - GlobalValue::getRealLinkageName(OptDiag->getFunction().getName()); + GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); StringRef PassName(OptDiag->PassName); io.mapRequired("Pass", PassName); diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index 1a53a8ed4283..502f4205b689 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -75,11 +75,14 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, return None; assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && "We can only get profile count for call/invoke instruction."); - // Check if there is a profile metadata on the instruction. If it is present, - // determine hotness solely based on that. - uint64_t TotalCount; - if (Inst->extractProfTotalWeight(TotalCount)) - return TotalCount; + if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) { + // In sample PGO mode, check if there is a profile metadata on the + // instruction. If it is present, determine hotness solely based on that, + // since the sampled entry count may not be accurate. + uint64_t TotalCount; + if (Inst->extractProfTotalWeight(TotalCount)) + return TotalCount; + } if (BFI) return BFI->getBlockProfileCount(Inst->getParent()); return None; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 01dca0793145..800354d2f5b4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -584,7 +584,7 @@ CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache, static int CompareSCEVComplexity( SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, - unsigned Depth = 0) { + DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. if (LHS == RHS) return 0; @@ -629,9 +629,16 @@ static int CompareSCEVComplexity( const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); - // Compare addrec loop depths. + // If there is a dominance relationship between the loops, sort by the + // dominance. Otherwise, sort by depth. We require such order in getAddExpr. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); if (LLoop != RLoop) { + const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); + assert(LHead != RHead && "Two loops share the same header?"); + if (DT.dominates(LHead, RHead)) + return 1; + else if (DT.dominates(RHead, LHead)) + return -1; unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth(); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; @@ -645,7 +652,7 @@ static int CompareSCEVComplexity( // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i), - RA->getOperand(i), Depth + 1); + RA->getOperand(i), DT, Depth + 1); if (X != 0) return X; } @@ -669,7 +676,7 @@ static int CompareSCEVComplexity( if (i >= RNumOps) return 1; int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i), - RC->getOperand(i), Depth + 1); + RC->getOperand(i), DT, Depth + 1); if (X != 0) return X; } @@ -683,10 +690,10 @@ static int CompareSCEVComplexity( // Lexicographically compare udiv expressions. int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(), - Depth + 1); + DT, Depth + 1); if (X != 0) return X; - X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), + X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT, Depth + 1); if (X == 0) EqCacheSCEV.insert({LHS, RHS}); @@ -701,7 +708,7 @@ static int CompareSCEVComplexity( // Compare cast expressions by operand. int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(), - RC->getOperand(), Depth + 1); + RC->getOperand(), DT, Depth + 1); if (X == 0) EqCacheSCEV.insert({LHS, RHS}); return X; @@ -724,7 +731,7 @@ static int CompareSCEVComplexity( /// land in memory. /// static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, - LoopInfo *LI) { + LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache; @@ -732,15 +739,16 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; - if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0) + if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. std::stable_sort(Ops.begin(), Ops.end(), - [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) { - return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0; + [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) { + return + CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0; }); // Now that we are sorted by complexity, group elements of the same @@ -2186,7 +2194,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); @@ -2492,7 +2500,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // added together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); - ++OtherIdx) + ++OtherIdx) { + // We expect the AddRecExpr's to be sorted in reverse dominance order, + // so that the 1st found AddRecExpr is dominated by all others. + assert(DT.dominates( + cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(), + AddRec->getLoop()->getHeader()) && + "AddRecExprs are not sorted in reverse dominance order?"); if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), @@ -2518,6 +2532,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } + } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2614,7 +2629,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); @@ -3211,7 +3226,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3312,7 +3327,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; @@ -4636,7 +4651,7 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { KnownBits Known(BitWidth); computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC, nullptr, &DT); - return Known.Zero.countTrailingOnes(); + return Known.countMinTrailingZeros(); } // SCEVUDivExpr @@ -5955,6 +5970,30 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, return false; } +ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) + : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {} + +ScalarEvolution::ExitLimit::ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList) + : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { + assert((isa<SCEVCouldNotCompute>(ExactNotTaken) || + !isa<SCEVCouldNotCompute>(MaxNotTaken)) && + "Exact is not allowed to be less precise than Max"); + for (auto *PredSet : PredSetList) + for (auto *P : *PredSet) + addPredicate(P); +} + +ScalarEvolution::ExitLimit::ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + const SmallPtrSetImpl<const SCEVPredicate *> &PredSet) + : ExitLimit(E, M, MaxOrZero, {&PredSet}) {} + +ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, + bool MaxOrZero) + : ExitLimit(E, M, MaxOrZero, None) {} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -6637,13 +6676,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most // bitwidth(K) iterations. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); - bool KnownZero, KnownOne; - ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, - Predecessor->getTerminator(), &DT); + KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr, + Predecessor->getTerminator(), &DT); auto *Ty = cast<IntegerType>(RHS->getType()); - if (KnownZero) + if (Known.isNonNegative()) StableValue = ConstantInt::get(Ty, 0); - else if (KnownOne) + else if (Known.isNegative()) StableValue = ConstantInt::get(Ty, -1, true); else return getCouldNotCompute(); @@ -7377,48 +7415,49 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { const APInt &N = NC->getAPInt(); APInt Two(BitWidth, 2); - { - using namespace APIntOps; - const APInt& C = L; - // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C - // The B coefficient is M-N/2 - APInt B(M); - B -= N.sdiv(Two); + // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C - // The A coefficient is N/2 - APInt A(N.sdiv(Two)); + // The A coefficient is N/2 + APInt A = N.sdiv(Two); - // Compute the B^2-4ac term. - APInt SqrtTerm(B); - SqrtTerm *= B; - SqrtTerm -= 4 * (A * C); + // The B coefficient is M-N/2 + APInt B = M; + B -= A; // A is the same as N/2. - if (SqrtTerm.isNegative()) { - // The loop is provably infinite. - return None; - } + // The C coefficient is L. + const APInt& C = L; - // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest - // integer value or else APInt::sqrt() will assert. - APInt SqrtVal(SqrtTerm.sqrt()); + // Compute the B^2-4ac term. + APInt SqrtTerm = B; + SqrtTerm *= B; + SqrtTerm -= 4 * (A * C); - // Compute the two solutions for the quadratic formula. - // The divisions must be performed as signed divisions. - APInt NegB(-B); - APInt TwoA(A << 1); - if (TwoA.isMinValue()) - return None; + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + return None; + } + + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest + // integer value or else APInt::sqrt() will assert. + APInt SqrtVal = SqrtTerm.sqrt(); + + // Compute the two solutions for the quadratic formula. + // The divisions must be performed as signed divisions. + APInt NegB = -std::move(B); + APInt TwoA = std::move(A); + TwoA <<= 1; + if (TwoA.isNullValue()) + return None; - LLVMContext &Context = SE.getContext(); + LLVMContext &Context = SE.getContext(); - ConstantInt *Solution1 = - ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); - ConstantInt *Solution2 = - ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); - return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)), - cast<SCEVConstant>(SE.getConstant(Solution2))); - } // end APIntOps namespace + return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)), + cast<SCEVConstant>(SE.getConstant(Solution2))); } ScalarEvolution::ExitLimit @@ -8976,7 +9015,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, .getSignedMax(); // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! - return (std::move(MaxValue) - std::move(MaxStrideMinusOne)).slt(MaxRHS); + return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); } APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); @@ -8985,7 +9024,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, .getUnsignedMax(); // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! - return (std::move(MaxValue) - std::move(MaxStrideMinusOne)).ult(MaxRHS); + return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); } bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, @@ -9002,7 +9041,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, .getSignedMax(); // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! - return (std::move(MinValue) + std::move(MaxStrideMinusOne)).sgt(MinRHS); + return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); } APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); @@ -9011,7 +9050,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, .getUnsignedMax(); // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! - return (std::move(MinValue) + std::move(MaxStrideMinusOne)).ugt(MinRHS); + return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); } const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 848e1b4717b5..3cf1bbc5daa5 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -241,6 +241,50 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tanhf); } + // These definitions are due to math-finite.h header on Linux + TLI.setUnavailable(LibFunc_acos_finite); + TLI.setUnavailable(LibFunc_acosf_finite); + TLI.setUnavailable(LibFunc_acosl_finite); + TLI.setUnavailable(LibFunc_acosh_finite); + TLI.setUnavailable(LibFunc_acoshf_finite); + TLI.setUnavailable(LibFunc_acoshl_finite); + TLI.setUnavailable(LibFunc_asin_finite); + TLI.setUnavailable(LibFunc_asinf_finite); + TLI.setUnavailable(LibFunc_asinl_finite); + TLI.setUnavailable(LibFunc_atan2_finite); + TLI.setUnavailable(LibFunc_atan2f_finite); + TLI.setUnavailable(LibFunc_atan2l_finite); + TLI.setUnavailable(LibFunc_atanh_finite); + TLI.setUnavailable(LibFunc_atanhf_finite); + TLI.setUnavailable(LibFunc_atanhl_finite); + TLI.setUnavailable(LibFunc_cosh_finite); + TLI.setUnavailable(LibFunc_coshf_finite); + TLI.setUnavailable(LibFunc_coshl_finite); + TLI.setUnavailable(LibFunc_exp10_finite); + TLI.setUnavailable(LibFunc_exp10f_finite); + TLI.setUnavailable(LibFunc_exp10l_finite); + TLI.setUnavailable(LibFunc_exp2_finite); + TLI.setUnavailable(LibFunc_exp2f_finite); + TLI.setUnavailable(LibFunc_exp2l_finite); + TLI.setUnavailable(LibFunc_exp_finite); + TLI.setUnavailable(LibFunc_expf_finite); + TLI.setUnavailable(LibFunc_expl_finite); + TLI.setUnavailable(LibFunc_log10_finite); + TLI.setUnavailable(LibFunc_log10f_finite); + TLI.setUnavailable(LibFunc_log10l_finite); + TLI.setUnavailable(LibFunc_log2_finite); + TLI.setUnavailable(LibFunc_log2f_finite); + TLI.setUnavailable(LibFunc_log2l_finite); + TLI.setUnavailable(LibFunc_log_finite); + TLI.setUnavailable(LibFunc_logf_finite); + TLI.setUnavailable(LibFunc_logl_finite); + TLI.setUnavailable(LibFunc_pow_finite); + TLI.setUnavailable(LibFunc_powf_finite); + TLI.setUnavailable(LibFunc_powl_finite); + TLI.setUnavailable(LibFunc_sinh_finite); + TLI.setUnavailable(LibFunc_sinhf_finite); + TLI.setUnavailable(LibFunc_sinhl_finite); + // Win32 does *not* provide provide these functions, but they are // generally available on POSIX-compliant systems: TLI.setUnavailable(LibFunc_access); @@ -496,7 +540,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) { // Check for \01 prefix that is used to mangle __asm declarations and // strip it if present. - return GlobalValue::getRealLinkageName(funcName); + return GlobalValue::dropLLVMManglingEscape(funcName); } bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, @@ -1004,22 +1048,34 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 1 && FTy.getParamType(0)->isFloatingPointTy()); case LibFunc_acos: + case LibFunc_acos_finite: case LibFunc_acosf: + case LibFunc_acosf_finite: case LibFunc_acosh: + case LibFunc_acosh_finite: case LibFunc_acoshf: + case LibFunc_acoshf_finite: case LibFunc_acoshl: + case LibFunc_acoshl_finite: case LibFunc_acosl: + case LibFunc_acosl_finite: case LibFunc_asin: + case LibFunc_asin_finite: case LibFunc_asinf: + case LibFunc_asinf_finite: case LibFunc_asinh: case LibFunc_asinhf: case LibFunc_asinhl: case LibFunc_asinl: + case LibFunc_asinl_finite: case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanh: + case LibFunc_atanh_finite: case LibFunc_atanhf: + case LibFunc_atanhf_finite: case LibFunc_atanhl: + case LibFunc_atanhl_finite: case LibFunc_atanl: case LibFunc_cbrt: case LibFunc_cbrtf: @@ -1030,18 +1086,30 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosh: + case LibFunc_cosh_finite: case LibFunc_coshf: + case LibFunc_coshf_finite: case LibFunc_coshl: + case LibFunc_coshl_finite: case LibFunc_cosl: case LibFunc_exp10: + case LibFunc_exp10_finite: case LibFunc_exp10f: + case LibFunc_exp10f_finite: case LibFunc_exp10l: + case LibFunc_exp10l_finite: case LibFunc_exp2: + case LibFunc_exp2_finite: case LibFunc_exp2f: + case LibFunc_exp2f_finite: case LibFunc_exp2l: + case LibFunc_exp2l_finite: case LibFunc_exp: + case LibFunc_exp_finite: case LibFunc_expf: + case LibFunc_expf_finite: case LibFunc_expl: + case LibFunc_expl_finite: case LibFunc_expm1: case LibFunc_expm1f: case LibFunc_expm1l: @@ -1052,20 +1120,29 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_floorf: case LibFunc_floorl: case LibFunc_log10: + case LibFunc_log10_finite: case LibFunc_log10f: + case LibFunc_log10f_finite: case LibFunc_log10l: + case LibFunc_log10l_finite: case LibFunc_log1p: case LibFunc_log1pf: case LibFunc_log1pl: case LibFunc_log2: + case LibFunc_log2_finite: case LibFunc_log2f: + case LibFunc_log2f_finite: case LibFunc_log2l: + case LibFunc_log2l_finite: case LibFunc_log: + case LibFunc_log_finite: case LibFunc_logb: case LibFunc_logbf: case LibFunc_logbl: case LibFunc_logf: + case LibFunc_logf_finite: case LibFunc_logl: + case LibFunc_logl_finite: case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: @@ -1078,8 +1155,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinh: + case LibFunc_sinh_finite: case LibFunc_sinhf: + case LibFunc_sinhf_finite: case LibFunc_sinhl: + case LibFunc_sinhl_finite: case LibFunc_sinl: case LibFunc_sqrt: case LibFunc_sqrt_finite: @@ -1100,8 +1180,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getReturnType() == FTy.getParamType(0)); case LibFunc_atan2: + case LibFunc_atan2_finite: case LibFunc_atan2f: + case LibFunc_atan2f_finite: case LibFunc_atan2l: + case LibFunc_atan2l_finite: case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: @@ -1115,8 +1198,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_copysignf: case LibFunc_copysignl: case LibFunc_pow: + case LibFunc_pow_finite: case LibFunc_powf: + case LibFunc_powf_finite: case LibFunc_powl: + case LibFunc_powl_finite: return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0) && FTy.getReturnType() == FTy.getParamType(1)); @@ -1294,6 +1380,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"powf", "__svml_powf8", 8}, {"powf", "__svml_powf16", 16}, + { "__pow_finite", "__svml_pow2", 2 }, + { "__pow_finite", "__svml_pow4", 4 }, + { "__pow_finite", "__svml_pow8", 8 }, + + { "__powf_finite", "__svml_powf4", 4 }, + { "__powf_finite", "__svml_powf8", 8 }, + { "__powf_finite", "__svml_powf16", 16 }, + {"llvm.pow.f64", "__svml_pow2", 2}, {"llvm.pow.f64", "__svml_pow4", 4}, {"llvm.pow.f64", "__svml_pow8", 8}, @@ -1310,6 +1404,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"expf", "__svml_expf8", 8}, {"expf", "__svml_expf16", 16}, + { "__exp_finite", "__svml_exp2", 2 }, + { "__exp_finite", "__svml_exp4", 4 }, + { "__exp_finite", "__svml_exp8", 8 }, + + { "__expf_finite", "__svml_expf4", 4 }, + { "__expf_finite", "__svml_expf8", 8 }, + { "__expf_finite", "__svml_expf16", 16 }, + {"llvm.exp.f64", "__svml_exp2", 2}, {"llvm.exp.f64", "__svml_exp4", 4}, {"llvm.exp.f64", "__svml_exp8", 8}, @@ -1326,6 +1428,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"logf", "__svml_logf8", 8}, {"logf", "__svml_logf16", 16}, + { "__log_finite", "__svml_log2", 2 }, + { "__log_finite", "__svml_log4", 4 }, + { "__log_finite", "__svml_log8", 8 }, + + { "__logf_finite", "__svml_logf4", 4 }, + { "__logf_finite", "__svml_logf8", 8 }, + { "__logf_finite", "__svml_logf16", 16 }, + {"llvm.log.f64", "__svml_log2", 2}, {"llvm.log.f64", "__svml_log4", 4}, {"llvm.log.f64", "__svml_log8", 8}, diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 26d606cce9bb..8a5d10473662 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -279,6 +279,10 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } +unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { + return TTIImpl->getMinVectorRegisterBitWidth(); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( @@ -500,6 +504,15 @@ unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } +bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, + Type *Ty, ReductionFlags Flags) const { + return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); +} + +bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { + return TTIImpl->shouldExpandReduction(II); +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a7f3ff672aef..cba7363a0afa 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -88,9 +88,8 @@ struct Query { /// classic case of this is assume(x = y), which will attempt to determine /// bits in x from bits in y, which will attempt to determine bits in y from /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call - /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and - /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so - /// on. + /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo + /// (all of which can call computeKnownBits), and so on. std::array<const Value *, MaxDepth> Excluded; unsigned NumExcluded; @@ -143,6 +142,16 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known, Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } +static KnownBits computeKnownBits(const Value *V, unsigned Depth, + const Query &Q); + +KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); +} + bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -159,16 +168,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } -static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - unsigned Depth, const Query &Q); - -void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - ::ComputeSignBit(V, KnownZero, KnownOne, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT)); -} static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const Query &Q); @@ -194,9 +193,8 @@ bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - bool NonNegative, Negative; - ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); - return NonNegative; + KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); + return Known.isNonNegative(); } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, @@ -214,9 +212,8 @@ bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - bool NonNegative, Negative; - ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); - return Negative; + KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); + return Known.isNegative(); } static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q); @@ -342,10 +339,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - unsigned TrailZ = Known.Zero.countTrailingOnes() + - Known2.Zero.countTrailingOnes(); - unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + - Known2.Zero.countLeadingOnes(), + unsigned TrailZ = Known.countMinTrailingZeros() + + Known2.countMinTrailingZeros(); + unsigned LeadZ = std::max(Known.countMinLeadingZeros() + + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; TrailZ = std::min(TrailZ, BitWidth); @@ -750,8 +747,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. - Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()); - // assume(v <_u c) + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); + // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -761,9 +758,9 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) - Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()+1); + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); else - Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()); + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); } } @@ -916,7 +913,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, m_Value(Y))))) { Known2.resetAll(); computeKnownBits(Y, Known2, Depth + 1, Q); - if (Known2.One.countTrailingOnes() > 0) + if (Known2.countMinTrailingOnes() > 0) Known.Zero.setBit(0); } break; @@ -953,14 +950,13 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); - unsigned LeadZ = Known2.Zero.countLeadingOnes(); + unsigned LeadZ = Known2.countMinLeadingZeros(); Known2.resetAll(); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros(); - if (RHSUnknownLeadingOnes != BitWidth) - LeadZ = std::min(BitWidth, - LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); + if (RHSMaxLeadingZeros != BitWidth) + LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); Known.Zero.setHighBits(LeadZ); break; @@ -983,8 +979,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, if (Known.isNegative() && Known2.isNegative()) // We can derive a lower bound on the result by taking the max of the // leading one bits. - MaxHighOnes = std::max(Known.One.countLeadingOnes(), - Known2.One.countLeadingOnes()); + MaxHighOnes = + std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); // If either side is non-negative, the result is non-negative. else if (Known.isNonNegative() || Known2.isNonNegative()) MaxHighZeros = 1; @@ -993,8 +989,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, if (Known.isNonNegative() && Known2.isNonNegative()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. - MaxHighZeros = std::max(Known.Zero.countLeadingOnes(), - Known2.Zero.countLeadingOnes()); + MaxHighZeros = std::max(Known.countMinLeadingZeros(), + Known2.countMinLeadingZeros()); // If either side is negative, the result is negative. else if (Known.isNegative() || Known2.isNegative()) MaxHighOnes = 1; @@ -1002,12 +998,12 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = - std::max(Known.One.countLeadingOnes(), Known2.One.countLeadingOnes()); + std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); } else if (SPF == SPF_UMIN) { // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = - std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes()); + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); } // Only known if known in both the LHS and RHS. @@ -1185,8 +1181,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); - unsigned Leaders = std::max(Known.Zero.countLeadingOnes(), - Known2.Zero.countLeadingOnes()); + unsigned Leaders = + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); Known.resetAll(); Known.Zero.setHighBits(Leaders); break; @@ -1207,7 +1203,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // to determine if we can prove known low zero bits. KnownBits LocalKnown(BitWidth); computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); - unsigned TrailZ = LocalKnown.Zero.countTrailingOnes(); + unsigned TrailZ = LocalKnown.countMinTrailingZeros(); gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { @@ -1241,7 +1237,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, computeKnownBits(Index, LocalKnown, Depth + 1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + - LocalKnown.Zero.countTrailingOnes())); + LocalKnown.countMinTrailingZeros())); } } @@ -1286,8 +1282,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, KnownBits Known3(Known); computeKnownBits(L, Known3, Depth + 1, Q); - Known.Zero.setLowBits(std::min(Known2.Zero.countTrailingOnes(), - Known3.Zero.countTrailingOnes())); + Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), + Known3.countMinTrailingZeros())); if (DontImproveNonNegativePhiBits) break; @@ -1386,12 +1382,25 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known.Zero |= Known2.Zero.byteSwap(); Known.One |= Known2.One.byteSwap(); break; - case Intrinsic::ctlz: + case Intrinsic::ctlz: { + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = Known2.One.countLeadingZeros(); + // If this call is undefined for 0, the result will be less than 2^n. + if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) + PossibleLZ = std::min(PossibleLZ, BitWidth - 1); + unsigned LowBits = Log2_32(PossibleLZ)+1; + Known.Zero.setBitsFrom(LowBits); + break; + } case Intrinsic::cttz: { - unsigned LowBits = Log2_32(BitWidth)+1; + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.One.countTrailingZeros(); // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) - LowBits -= 1; + PossibleTZ = std::min(PossibleTZ, BitWidth - 1); + unsigned LowBits = Log2_32(PossibleTZ)+1; Known.Zero.setBitsFrom(LowBits); break; } @@ -1399,7 +1408,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. - unsigned BitsPossiblySet = BitWidth - Known2.Zero.countPopulation(); + unsigned BitsPossiblySet = Known2.countMaxPopulation(); unsigned LowBits = Log2_32(BitsPossiblySet)+1; Known.Zero.setBitsFrom(LowBits); // TODO: we could bound KnownOne using the lower bound on the number @@ -1450,6 +1459,14 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, } /// Determine which bits of V are known to be either zero or one and return +/// them. +KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { + KnownBits Known(getBitWidth(V->getType(), Q.DL)); + computeKnownBits(V, Known, Depth, Q); + return Known; +} + +/// Determine which bits of V are known to be either zero or one and return /// them in the Known bit set. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that @@ -1568,16 +1585,6 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } -/// Determine whether the sign bit is known to be zero or one. -/// Convenience wrapper around computeKnownBits. -void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - unsigned Depth, const Query &Q) { - KnownBits Bits(getBitWidth(V->getType(), Q.DL)); - computeKnownBits(V, Bits, Depth, Q); - KnownOne = Bits.isNegative(); - KnownZero = Bits.isNonNegative(); -} - /// Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer @@ -1842,24 +1849,20 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (BO->isExact()) return isKnownNonZero(X, Depth, Q); - bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q); - if (XKnownNegative) + KnownBits Known = computeKnownBits(X, Depth, Q); + if (Known.isNegative()) return true; // If the shifter operand is a constant, and all of the bits shifted // out are known to be zero, and X is known non-zero then at least one // non-zero bit must remain. if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { - KnownBits Known(BitWidth); - computeKnownBits(X, Known, Depth, Q); - auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); // Is there a known one in the portion not shifted out? - if (Known.One.countLeadingZeros() < BitWidth - ShiftVal) + if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal) return true; // Are all the bits to be shifted out known zero? - if (Known.Zero.countTrailingOnes() >= ShiftVal) + if (Known.countMinTrailingZeros() >= ShiftVal) return isKnownNonZero(X, Depth, Q); } } @@ -1869,39 +1872,34 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { - bool XKnownNonNegative, XKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Depth, Q); + KnownBits XKnown = computeKnownBits(X, Depth, Q); + KnownBits YKnown = computeKnownBits(Y, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. - if (XKnownNonNegative && YKnownNonNegative) + if (XKnown.isNonNegative() && YKnown.isNonNegative()) if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not // zero unless both X and Y equal INT_MIN. - if (XKnownNegative && YKnownNegative) { - KnownBits Known(BitWidth); + if (XKnown.isNegative() && YKnown.isNegative()) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, Known, Depth, Q); - if (Known.One.intersects(Mask)) + if (XKnown.One.intersects(Mask)) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, Known, Depth, Q); - if (Known.One.intersects(Mask)) + if (YKnown.One.intersects(Mask)) return true; } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && + if (XKnown.isNonNegative() && isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) return true; - if (YKnownNonNegative && + if (YKnown.isNonNegative() && isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) return true; } @@ -2276,14 +2274,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. - if (Known.isNonNegative()) - return std::max(FirstAnswer, Known.Zero.countLeadingOnes()); - - if (Known.isNegative()) - return std::max(FirstAnswer, Known.One.countLeadingOnes()); - - // computeKnownBits gave us no extra information about the top bits. - return FirstAnswer; + return std::max(FirstAnswer, Known.countMinSignBits()); } /// This function computes the integer multiple of Base that equals V. @@ -3441,8 +3432,8 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); // Note that underestimating the number of zero bits gives a more // conservative answer. - unsigned ZeroBits = LHSKnown.Zero.countLeadingOnes() + - RHSKnown.Zero.countLeadingOnes(); + unsigned ZeroBits = LHSKnown.countMinLeadingZeros() + + RHSKnown.countMinLeadingZeros(); // First handle the easy case: if we have enough zero bits there's // definitely no overflow. if (ZeroBits >= BitWidth) @@ -3475,21 +3466,17 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - bool LHSKnownNonNegative, LHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); - if (LHSKnownNonNegative || LHSKnownNegative) { - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); - - if (LHSKnownNegative && RHSKnownNegative) { + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + + if (LHSKnown.isNegative() && RHSKnown.isNegative()) { // The sign bit is set in both cases: this MUST overflow. // Create a simple add instruction, and insert it into the struct. return OverflowResult::AlwaysOverflows; } - if (LHSKnownNonNegative && RHSKnownNonNegative) { + if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) { // The sign bit is clear in both cases: this CANNOT overflow. // Create a simple add instruction, and insert it into the struct. return OverflowResult::NeverOverflows; @@ -3499,6 +3486,51 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, return OverflowResult::MayOverflow; } +/// \brief Return true if we can prove that adding the two values of the +/// knownbits will not overflow. +/// Otherwise return false. +static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, + const KnownBits &RHSKnown) { + // Addition of two 2's complement numbers having opposite signs will never + // overflow. + if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNegative())) + return true; + + // If either of the values is known to be non-negative, adding them can only + // overflow if the second is also non-negative, so we can assume that. + // Two non-negative numbers will only overflow if there is a carry to the + // sign bit, so we can check if even when the values are as big as possible + // there is no overflow to the sign bit. + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { + APInt MaxLHS = ~LHSKnown.Zero; + MaxLHS.clearSignBit(); + APInt MaxRHS = ~RHSKnown.Zero; + MaxRHS.clearSignBit(); + APInt Result = std::move(MaxLHS) + std::move(MaxRHS); + return Result.isSignBitClear(); + } + + // If either of the values is known to be negative, adding them can only + // overflow if the second is also negative, so we can assume that. + // Two negative number will only overflow if there is no carry to the sign + // bit, so we can check if even when the values are as small as possible + // there is overflow to the sign bit. + if (LHSKnown.isNegative() || RHSKnown.isNegative()) { + APInt MinLHS = LHSKnown.One; + MinLHS.clearSignBit(); + APInt MinRHS = RHSKnown.One; + MinRHS.clearSignBit(); + APInt Result = std::move(MinLHS) + std::move(MinRHS); + return Result.isSignBitSet(); + } + + // If we reached here it means that we know nothing about the sign bits. + // In this case we can't know if there will be an overflow, since by + // changing the sign bits any two values can be made to overflow. + return false; +} + static OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const AddOperator *Add, @@ -3510,18 +3542,29 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, return OverflowResult::NeverOverflows; } - bool LHSKnownNonNegative, LHSKnownNegative; - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); + // If LHS and RHS each have at least two sign bits, the addition will look + // like + // + // XX..... + + // YY..... + // + // If the carry into the most significant position is 0, X and Y can't both + // be 1 and therefore the carry out of the addition is also 0. + // + // If the carry into the most significant position is 1, X and Y can't both + // be 0 and therefore the carry out of the addition is also 1. + // + // Since the carry into the most significant position is always equal to + // the carry out of the addition, there is no signed overflow. + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + return OverflowResult::NeverOverflows; + + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - if ((LHSKnownNonNegative && RHSKnownNegative) || - (LHSKnownNegative && RHSKnownNonNegative)) { - // The sign bits are opposite: this CANNOT overflow. + if (checkRippleForSignedAdd(LHSKnown, RHSKnown)) return OverflowResult::NeverOverflows; - } // The remaining code needs Add to be available. Early returns if not so. if (!Add) @@ -3532,14 +3575,13 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // @llvm.assume'ed non-negative rather than proved so from analyzing its // operands. bool LHSOrRHSKnownNonNegative = - (LHSKnownNonNegative || RHSKnownNonNegative); - bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); + (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); + bool LHSOrRHSKnownNegative = + (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { - bool AddKnownNonNegative, AddKnownNegative; - ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, - /*Depth=*/0, AC, CxtI, DT); - if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || - (AddKnownNegative && LHSOrRHSKnownNegative)) { + KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); + if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || + (AddKnown.isNegative() && LHSOrRHSKnownNegative)) { return OverflowResult::NeverOverflows; } } diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 722f17a8067e..2d2249da4e13 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" using namespace llvm; using namespace llvm::PatternMatch; |