diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:37:50 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:37:50 +0000 |
commit | 581a6d8501ff5614297da837b81ed3b6956361ea (patch) | |
tree | 985ee91d0ca1d3e6506ac5ff7e37f5b67adfec09 /lib/Analysis | |
parent | 909545a822eef491158f831688066f0ec2866938 (diff) |
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/AssumptionCache.cpp | 109 | ||||
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Analysis/CostModel.cpp | 5 | ||||
-rw-r--r-- | lib/Analysis/IVUsers.cpp | 19 | ||||
-rw-r--r-- | lib/Analysis/InlineCost.cpp | 41 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 80 | ||||
-rw-r--r-- | lib/Analysis/LazyValueInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 42 | ||||
-rw-r--r-- | lib/Analysis/LoopAnalysisManager.cpp | 160 | ||||
-rw-r--r-- | lib/Analysis/LoopInfo.cpp | 7 | ||||
-rw-r--r-- | lib/Analysis/LoopPass.cpp | 12 | ||||
-rw-r--r-- | lib/Analysis/LoopPassManager.cpp | 59 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 67 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolution.cpp | 45 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 5 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 76 |
16 files changed, 507 insertions, 224 deletions
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index 3c518034ba62d..aa55d79b761ee 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -24,6 +24,109 @@ using namespace llvm; using namespace llvm::PatternMatch; +SmallVector<WeakVH, 1> &AssumptionCache::getAffectedValues(Value *V) { + // Try using find_as first to avoid creating extra value handles just for the + // purpose of doing the lookup. + auto AVI = AffectedValues.find_as(V); + if (AVI != AffectedValues.end()) + return AVI->second; + + auto AVIP = AffectedValues.insert({ + AffectedValueCallbackVH(V, this), SmallVector<WeakVH, 1>()}); + return AVIP.first->second; +} + +void AssumptionCache::updateAffectedValues(CallInst *CI) { + // Note: This code must be kept in-sync with the code in + // computeKnownBitsFromAssume in ValueTracking. + + SmallVector<Value *, 16> Affected; + auto AddAffected = [&Affected](Value *V) { + if (isa<Argument>(V)) { + Affected.push_back(V); + } else if (auto *I = dyn_cast<Instruction>(V)) { + Affected.push_back(I); + + if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt) { + auto *Op = I->getOperand(0); + if (isa<Instruction>(Op) || isa<Argument>(Op)) + Affected.push_back(Op); + } + } + }; + + Value *Cond = CI->getArgOperand(0), *A, *B; + AddAffected(Cond); + + CmpInst::Predicate Pred; + if (match(Cond, m_ICmp(Pred, m_Value(A), m_Value(B)))) { + AddAffected(A); + AddAffected(B); + + if (Pred == ICmpInst::ICMP_EQ) { + // For equality comparisons, we handle the case of bit inversion. + auto AddAffectedFromEq = [&AddAffected](Value *V) { + Value *A; + if (match(V, m_Not(m_Value(A)))) { + AddAffected(A); + V = A; + } + + Value *B; + ConstantInt *C; + // (A & B) or (A | B) or (A ^ B). + if (match(V, + m_CombineOr(m_And(m_Value(A), m_Value(B)), + m_CombineOr(m_Or(m_Value(A), m_Value(B)), + m_Xor(m_Value(A), m_Value(B)))))) { + AddAffected(A); + AddAffected(B); + // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant. + } else if (match(V, + m_CombineOr(m_Shl(m_Value(A), m_ConstantInt(C)), + m_CombineOr(m_LShr(m_Value(A), m_ConstantInt(C)), + m_AShr(m_Value(A), + m_ConstantInt(C)))))) { + AddAffected(A); + } + }; + + AddAffectedFromEq(A); + AddAffectedFromEq(B); + } + } + + for (auto &AV : Affected) { + auto &AVV = getAffectedValues(AV); + if (std::find(AVV.begin(), AVV.end(), CI) == AVV.end()) + AVV.push_back(CI); + } +} + +void AssumptionCache::AffectedValueCallbackVH::deleted() { + auto AVI = AC->AffectedValues.find(getValPtr()); + if (AVI != AC->AffectedValues.end()) + AC->AffectedValues.erase(AVI); + // 'this' now dangles! +} + +void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { + if (!isa<Instruction>(NV) && !isa<Argument>(NV)) + return; + + // Any assumptions that affected this value now affect the new value. + + auto &NAVV = AC->getAffectedValues(NV); + auto AVI = AC->AffectedValues.find(getValPtr()); + if (AVI == AC->AffectedValues.end()) + return; + + for (auto &A : AVI->second) + if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) + NAVV.push_back(A); +} + void AssumptionCache::scanFunction() { assert(!Scanned && "Tried to scan the function twice!"); assert(AssumeHandles.empty() && "Already have assumes when scanning!"); @@ -37,6 +140,10 @@ void AssumptionCache::scanFunction() { // Mark the scan as complete. Scanned = true; + + // Update affected values. + for (auto &A : AssumeHandles) + updateAffectedValues(cast<CallInst>(A)); } void AssumptionCache::registerAssumption(CallInst *CI) { @@ -72,6 +179,8 @@ void AssumptionCache::registerAssumption(CallInst *CI) { "Cache contains multiple copies of a call!"); } #endif + + updateAffectedValues(CI); } AnalysisKey AssumptionAnalysis::Key; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 08d50c29dfc87..d53364373d7bc 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -44,10 +44,10 @@ add_llvm_library(LLVMAnalysis Lint.cpp Loads.cpp LoopAccessAnalysis.cpp + LoopAnalysisManager.cpp LoopUnrollAnalyzer.cpp LoopInfo.cpp LoopPass.cpp - LoopPassManager.cpp MemDepPrinter.cpp MemDerefPrinter.cpp MemoryBuiltins.cpp diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 67d1773f0811b..6b77397956cda 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -438,8 +438,11 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { getOperandInfo(I->getOperand(0)); TargetTransformInfo::OperandValueKind Op2VK = getOperandInfo(I->getOperand(1)); + SmallVector<const Value*, 2> Operands(I->operand_values()); return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, - Op2VK); + Op2VK, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, + Operands); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 76e2561b9da39..a661b0101e6a6 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -16,8 +16,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -36,20 +36,9 @@ using namespace llvm; AnalysisKey IVUsersAnalysis::Key; -IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM) { - const auto &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function *F = L.getHeader()->getParent(); - - return IVUsers(&L, FAM.getCachedResult<AssumptionAnalysis>(*F), - FAM.getCachedResult<LoopAnalysis>(*F), - FAM.getCachedResult<DominatorTreeAnalysis>(*F), - FAM.getCachedResult<ScalarEvolutionAnalysis>(*F)); -} - -PreservedAnalyses IVUsersPrinterPass::run(Loop &L, LoopAnalysisManager &AM) { - AM.getResult<IVUsersAnalysis>(L).print(OS); - return PreservedAnalyses::all(); +IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return IVUsers(&L, &AR.AC, &AR.LI, &AR.DT, &AR.SE); } char IVUsersWrapperPass::ID = 0; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 9b9faacd354c3..4109049ecabcf 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -636,30 +636,27 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { else if (Caller->optForSize()) Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); - bool HotCallsite = false; - uint64_t TotalWeight; - if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) && - PSI->isHotCount(TotalWeight)) { - HotCallsite = true; + // Adjust the threshold based on inlinehint attribute and profile based + // hotness information if the caller does not have MinSize attribute. + if (!Caller->optForMinSize()) { + if (Callee.hasFnAttribute(Attribute::InlineHint)) + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + if (PSI) { + uint64_t TotalWeight; + if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) && + PSI->isHotCount(TotalWeight)) { + Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); + } else if (PSI->isFunctionEntryHot(&Callee)) { + // If callsite hotness can not be determined, we may still know + // that the callee is hot and treat it as a weaker hint for threshold + // increase. + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + } else if (PSI->isFunctionEntryCold(&Callee)) { + Threshold = MinIfValid(Threshold, Params.ColdThreshold); + } + } } - // Listen to the inlinehint attribute or profile based hotness information - // when it would increase the threshold and the caller does not need to - // minimize its size. - bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) || - (PSI && PSI->isFunctionEntryHot(&Callee)); - if (InlineHint && !Caller->optForMinSize()) - Threshold = MaxIfValid(Threshold, Params.HintThreshold); - - if (HotCallsite && !Caller->optForMinSize()) - Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); - - bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee); - // For cold callees, use the ColdThreshold knob if it is available and reduces - // the threshold. - if (ColdCallee) - Threshold = MinIfValid(Threshold, Params.ColdThreshold); - // Finally, take the target-specific inlining threshold multiplier into // account. Threshold *= TTI.getInliningThresholdMultiplier(); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 8da2f0981d0ca..796e6e444980d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -3583,7 +3583,7 @@ static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, *Y == *C) return TrueWhenUnset ? TrueVal : FalseVal; } - + return nullptr; } @@ -3595,7 +3595,7 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal, unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); if (!BitWidth) return nullptr; - + APInt MinSignedValue; Value *X; if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) { @@ -4252,14 +4252,36 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, const Query &Q, unsigned MaxRecurse) { Intrinsic::ID IID = F->getIntrinsicID(); unsigned NumOperands = std::distance(ArgBegin, ArgEnd); - Type *ReturnType = F->getReturnType(); + + // Unary Ops + if (NumOperands == 1) { + // Perform idempotent optimizations + if (IsIdempotent(IID)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) { + if (II->getIntrinsicID() == IID) + return II; + } + } + + switch (IID) { + case Intrinsic::fabs: { + if (SignBitMustBeZero(*ArgBegin, Q.TLI)) + return *ArgBegin; + } + default: + return nullptr; + } + } // Binary Ops if (NumOperands == 2) { Value *LHS = *ArgBegin; Value *RHS = *(ArgBegin + 1); - if (IID == Intrinsic::usub_with_overflow || - IID == Intrinsic::ssub_with_overflow) { + Type *ReturnType = F->getReturnType(); + + switch (IID) { + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: { // X - X -> { 0, false } if (LHS == RHS) return Constant::getNullValue(ReturnType); @@ -4268,17 +4290,19 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, // undef - X -> undef if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) return UndefValue::get(ReturnType); - } - if (IID == Intrinsic::uadd_with_overflow || - IID == Intrinsic::sadd_with_overflow) { + return nullptr; + } + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: { // X + undef -> undef if (isa<UndefValue>(RHS)) return UndefValue::get(ReturnType); - } - if (IID == Intrinsic::umul_with_overflow || - IID == Intrinsic::smul_with_overflow) { + return nullptr; + } + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: { // X * 0 -> { 0, false } if (match(RHS, m_Zero())) return Constant::getNullValue(ReturnType); @@ -4286,34 +4310,34 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, // X * undef -> { 0, false } if (match(RHS, m_Undef())) return Constant::getNullValue(ReturnType); - } - if (IID == Intrinsic::load_relative && isa<Constant>(LHS) && - isa<Constant>(RHS)) - return SimplifyRelativeLoad(cast<Constant>(LHS), cast<Constant>(RHS), - Q.DL); + return nullptr; + } + case Intrinsic::load_relative: { + Constant *C0 = dyn_cast<Constant>(LHS); + Constant *C1 = dyn_cast<Constant>(RHS); + if (C0 && C1) + return SimplifyRelativeLoad(C0, C1, Q.DL); + return nullptr; + } + default: + return nullptr; + } } // Simplify calls to llvm.masked.load.* - if (IID == Intrinsic::masked_load) { + switch (IID) { + case Intrinsic::masked_load: { Value *MaskArg = ArgBegin[2]; Value *PassthruArg = ArgBegin[3]; // If the mask is all zeros or undef, the "passthru" argument is the result. if (maskIsAllZeroOrUndef(MaskArg)) return PassthruArg; + return nullptr; } - - // Perform idempotent optimizations - if (!IsIdempotent(IID)) + default: return nullptr; - - // Unary Ops - if (NumOperands == 1) - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) - if (II->getIntrinsicID() == IID) - return II; - - return nullptr; + } } template <typename IterTy> diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 4f63552368737..d442310476cfb 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -925,7 +925,7 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( if (!BBI) return; - for (auto &AssumeVH : AC->assumptions()) { + for (auto &AssumeVH : AC->assumptionsFor(Val)) { if (!AssumeVH) continue; auto *I = cast<CallInst>(AssumeVH); diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 2f3dca3d23fa6..bf80072130974 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -12,22 +12,22 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -44,10 +44,10 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" @@ -2120,35 +2120,9 @@ INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true) AnalysisKey LoopAccessAnalysis::Key; -LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM) { - const FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); - Function &F = *L.getHeader()->getParent(); - auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F); - auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(F); - auto *AA = FAM.getCachedResult<AAManager>(F); - auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - auto *LI = FAM.getCachedResult<LoopAnalysis>(F); - if (!SE) - report_fatal_error( - "ScalarEvolution must have been cached at a higher level"); - if (!AA) - report_fatal_error("AliasAnalysis must have been cached at a higher level"); - if (!DT) - report_fatal_error("DominatorTree must have been cached at a higher level"); - if (!LI) - report_fatal_error("LoopInfo must have been cached at a higher level"); - return LoopAccessInfo(&L, SE, TLI, AA, DT, LI); -} - -PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L, - LoopAnalysisManager &AM) { - Function &F = *L.getHeader()->getParent(); - auto &LAI = AM.getResult<LoopAccessAnalysis>(L); - OS << "Loop access info in function '" << F.getName() << "':\n"; - OS.indent(2) << L.getHeader()->getName() << ":\n"; - LAI.print(OS, 4); - return PreservedAnalyses::all(); +LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI); } namespace llvm { diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp new file mode 100644 index 0000000000000..5be3ee341c9c2 --- /dev/null +++ b/lib/Analysis/LoopAnalysisManager.cpp @@ -0,0 +1,160 @@ +//===- LoopAnalysisManager.cpp - Loop analysis management -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/Dominators.h" + +using namespace llvm; + +// Explicit template instantiations and specialization defininitions for core +// template typedefs. +namespace llvm { +template class AllAnalysesOn<Loop>; +template class AnalysisManager<Loop, LoopStandardAnalysisResults &>; +template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; +template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop, + LoopStandardAnalysisResults &>; + +bool LoopAnalysisManagerFunctionProxy::Result::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // First compute the sequence of IR units covered by this proxy. We will want + // to visit this in postorder, but because this is a tree structure we can do + // this by building a preorder sequence and walking it in reverse. + SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; + // Note that we want to walk the roots in reverse order because we will end + // up reversing the preorder sequence. However, it happens that the loop nest + // roots are in reverse order within the LoopInfo object. So we just walk + // forward here. + // FIXME: If we change the order of LoopInfo we will want to add a reverse + // here. + for (Loop *RootL : *LI) { + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + Loop *L = PreOrderWorklist.pop_back_val(); + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + } + + // If this proxy or the loop info is going to be invalidated, we also need + // to clear all the keys coming from that analysis. We also completely blow + // away the loop analyses if any of the standard analyses provided by the + // loop pass manager go away so that loop analyses can freely use these + // without worrying about declaring dependencies on them etc. + // FIXME: It isn't clear if this is the right tradeoff. We could instead make + // loop analyses declare any dependencies on these and use the more general + // invalidation logic below to act on that. + auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>(); + if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || + Inv.invalidate<AAManager>(F, PA) || + Inv.invalidate<AssumptionAnalysis>(F, PA) || + Inv.invalidate<DominatorTreeAnalysis>(F, PA) || + Inv.invalidate<LoopAnalysis>(F, PA) || + Inv.invalidate<ScalarEvolutionAnalysis>(F, PA)) { + // Note that the LoopInfo may be stale at this point, however the loop + // objects themselves remain the only viable keys that could be in the + // analysis manager's cache. So we just walk the keys and forcibly clear + // those results. Note that the order doesn't matter here as this will just + // directly destroy the results without calling methods on them. + for (Loop *L : PreOrderLoops) + InnerAM->clear(*L); + + // We also need to null out the inner AM so that when the object gets + // destroyed as invalid we don't try to clear the inner AM again. At that + // point we won't be able to reliably walk the loops for this function and + // only clear results associated with those loops the way we do here. + // FIXME: Making InnerAM null at this point isn't very nice. Most analyses + // try to remain valid during invalidation. Maybe we should add an + // `IsClean` flag? + InnerAM = nullptr; + + // Now return true to indicate this *is* invalid and a fresh proxy result + // needs to be built. This is especially important given the null InnerAM. + return true; + } + + // Directly check if the relevant set is preserved so we can short circuit + // invalidating loops. + bool AreLoopAnalysesPreserved = + PA.allAnalysesInSetPreserved<AllAnalysesOn<Loop>>(); + + // Since we have a valid LoopInfo we can actually leave the cached results in + // the analysis manager associated with the Loop keys, but we need to + // propagate any necessary invalidation logic into them. We'd like to + // invalidate things in roughly the same order as they were put into the + // cache and so we walk the preorder list in reverse to form a valid + // postorder. + for (Loop *L : reverse(PreOrderLoops)) { + Optional<PreservedAnalyses> InnerPA; + + // Check to see whether the preserved set needs to be adjusted based on + // function-level analysis invalidation triggering deferred invalidation + // for this loop. + if (auto *OuterProxy = + InnerAM->getCachedResult<FunctionAnalysisManagerLoopProxy>(*L)) + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, F, PA)) { + if (!InnerPA) + InnerPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + InnerPA->abandon(InnerAnalysisID); + } + } + + // Check if we needed a custom PA set. If so we'll need to run the inner + // invalidation. + if (InnerPA) { + InnerAM->invalidate(*L, *InnerPA); + continue; + } + + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all Loop analyses. + if (!AreLoopAnalysesPreserved) + InnerAM->invalidate(*L, PA); + } + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +template <> +LoopAnalysisManagerFunctionProxy::Result +LoopAnalysisManagerFunctionProxy::run(Function &F, + FunctionAnalysisManager &AM) { + return Result(*InnerAM, AM.getResult<LoopAnalysis>(F)); +} +} + +PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { + PreservedAnalyses PA; + PA.preserve<AssumptionAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<LoopAnalysisManagerFunctionProxy>(); + PA.preserve<ScalarEvolutionAnalysis>(); + // TODO: What we really want to do here is preserve an AA category, but that + // concept doesn't exist yet. + PA.preserve<AAManager>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + return PA; +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 3d85ef6988a9a..f449ce94d57ca 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -689,18 +689,13 @@ PreservedAnalyses LoopPrinterPass::run(Function &F, return PreservedAnalyses::all(); } -PrintLoopPass::PrintLoopPass() : OS(dbgs()) {} -PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner) - : OS(OS), Banner(Banner) {} - -PreservedAnalyses PrintLoopPass::run(Loop &L, AnalysisManager<Loop> &) { +void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) { OS << Banner; for (auto *Block : L.blocks()) if (Block) Block->print(OS); else OS << "Printing <null> block"; - return PreservedAnalyses::all(); } //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index b5b8040984d72..3f4a07942154c 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopPassManager.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" @@ -32,13 +32,14 @@ namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. /// class PrintLoopPassWrapper : public LoopPass { - PrintLoopPass P; + raw_ostream &OS; + std::string Banner; public: static char ID; - PrintLoopPassWrapper() : LoopPass(ID) {} + PrintLoopPassWrapper() : LoopPass(ID), OS(dbgs()) {} PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner) - : LoopPass(ID), P(OS, Banner) {} + : LoopPass(ID), OS(OS), Banner(Banner) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -49,8 +50,7 @@ public: [](BasicBlock *BB) { return BB; }); if (BBI != L->blocks().end() && isFunctionInPrintList((*BBI)->getParent()->getName())) { - LoopAnalysisManager DummyLAM; - P.run(*L, DummyLAM); + printLoop(*L, OS, Banner); } return false; } diff --git a/lib/Analysis/LoopPassManager.cpp b/lib/Analysis/LoopPassManager.cpp deleted file mode 100644 index 044e5d55dafd0..0000000000000 --- a/lib/Analysis/LoopPassManager.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===- LoopPassManager.cpp - Loop pass management -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LoopPassManager.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/IR/Dominators.h" - -using namespace llvm; - -// Explicit template instantiations and specialization defininitions for core -// template typedefs. -namespace llvm { -template class PassManager<Loop>; -template class AnalysisManager<Loop>; -template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>; -template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>; - -template <> -bool LoopAnalysisManagerFunctionProxy::Result::invalidate( - Function &F, const PreservedAnalyses &PA, - FunctionAnalysisManager::Invalidator &Inv) { - // If this proxy isn't marked as preserved, the set of Function objects in - // the module may have changed. We therefore can't call - // InnerAM->invalidate(), because any pointers to Functions it has may be - // stale. - auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>(); - if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Loop>>()) - InnerAM->clear(); - - // FIXME: Proper suppor for invalidation isn't yet implemented for the LPM. - - // Return false to indicate that this result is still a valid proxy. - return false; -} -} - -PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { - PreservedAnalyses PA; - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - PA.preserve<ScalarEvolutionAnalysis>(); - // TODO: What we really want to do here is preserve an AA category, but that - // concept doesn't exist yet. - PA.preserve<AAManager>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); - return PA; -} diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index e7415e6231963..66a0d145dcd85 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -323,17 +323,28 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { + MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { - MemDepResult invariantGroupDependency = - getInvariantGroupPointerDependency(LI, BB); + InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB); - if (invariantGroupDependency.isDef()) - return invariantGroupDependency; + if (InvariantGroupDependency.isDef()) + return InvariantGroupDependency; } } - return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, - Limit); + MemDepResult SimpleDep = getSimplePointerDependencyFrom( + MemLoc, isLoad, ScanIt, BB, QueryInst, Limit); + if (SimpleDep.isDef()) + return SimpleDep; + // Non-local invariant group dependency indicates there is non local Def + // (it only returns nonLocal if it finds nonLocal def), which is better than + // local clobber and everything else. + if (InvariantGroupDependency.isNonLocal()) + return InvariantGroupDependency; + + assert(InvariantGroupDependency.isUnknown() && + "InvariantGroupDependency should be only unknown at this point"); + return SimpleDep; } MemDepResult @@ -358,6 +369,20 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // Queue to process all pointers that are equivalent to load operand. SmallVector<const Value *, 8> LoadOperandsQueue; LoadOperandsQueue.push_back(LoadOperand); + + Instruction *ClosestDependency = nullptr; + // Order of instructions in uses list is unpredictible. In order to always + // get the same result, we will look for the closest dominance. + auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) { + assert(Other && "Must call it with not null instruction"); + if (Best == nullptr || DT.dominates(Best, Other)) + return Other; + return Best; + }; + + + // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case + // we will see all the instructions. This should be fixed in MSSA. while (!LoadOperandsQueue.empty()) { const Value *Ptr = LoadOperandsQueue.pop_back_val(); assert(Ptr && !isa<GlobalValue>(Ptr) && @@ -388,12 +413,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // If we hit load/store with the same invariant.group metadata (and the // same pointer operand) we can assume that value pointed by pointer // operand didn't change. - if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB && + if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) - return MemDepResult::getDef(U); + ClosestDependency = GetClosestDependency(ClosestDependency, U); } } - return MemDepResult::getUnknown(); + + if (!ClosestDependency) + return MemDepResult::getUnknown(); + if (ClosestDependency->getParent() == BB) + return MemDepResult::getDef(ClosestDependency); + // Def(U) can't be returned here because it is non-local. If local + // dependency won't be found then return nonLocal counting that the + // user will call getNonLocalPointerDependency, which will return cached + // result. + NonLocalDefsCache.try_emplace( + LI, NonLocalDepResult(ClosestDependency->getParent(), + MemDepResult::getDef(ClosestDependency), nullptr)); + return MemDepResult::getNonLocal(); } MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( @@ -877,7 +914,17 @@ void MemoryDependenceResults::getNonLocalPointerDependency( assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + { + // Check if there is cached Def with invariant.group. FIXME: cache might be + // invalid if cached instruction would be removed between call to + // getPointerDependencyFrom and this function. + auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst); + if (NonLocalDefIt != NonLocalDefsCache.end()) { + Result.push_back(std::move(NonLocalDefIt->second)); + NonLocalDefsCache.erase(NonLocalDefIt); + return; + } + } // This routine does not expect to deal with volatile instructions. // Doing so would require piping through the QueryInst all the way through. // TODO: volatiles can't be elided, but they can be reordered with other diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 44f1a6dde0d21..b3905cc01e84b 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7032,20 +7032,21 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic // modulo (N / D). // - // (N / D) may need BW+1 bits in its representation. Hence, we'll use this - // bit width during computations. + // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent + // (N / D) in general. The inverse itself always fits into BW bits, though, + // so we immediately truncate it. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D APInt Mod(BW + 1, 0); Mod.setBit(BW - Mult2); // Mod = N / D - APInt I = AD.multiplicativeInverse(Mod); + APInt I = AD.multiplicativeInverse(Mod).trunc(BW); // 4. Compute the minimum unsigned root of the equation: // I * (B / D) mod (N / D) - APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); + // To simplify the computation, we factor out the divide by D: + // (I * B mod N) / D + APInt Result = (I * B).lshr(Mult2); - // The result is guaranteed to be less than 2^BW so we may truncate it to BW - // bits. - return SE.getConstant(Result.trunc(BW)); + return SE.getConstant(Result); } /// Find the roots of the quadratic equation for the given quadratic chrec @@ -7206,17 +7207,25 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { - ConstantRange CR = getUnsignedRange(Start); - const SCEV *MaxBECount; - if (!CountDown && CR.getUnsignedMin().isMinValue()) - // When counting up, the worst starting value is 1, not 0. - MaxBECount = CR.getUnsignedMax().isMinValue() - ? getConstant(APInt::getMinValue(CR.getBitWidth())) - : getConstant(APInt::getMaxValue(CR.getBitWidth())); - else - MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() - : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount, false, Predicates); + APInt MaxBECount = getUnsignedRange(Distance).getUnsignedMax(); + + // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, + // we end up with a loop whose backedge-taken count is n - 1. Detect this + // case, and see if we can improve the bound. + // + // Explicitly handling this here is necessary because getUnsignedRange + // isn't context-sensitive; it doesn't know that we only care about the + // range inside the loop. + const SCEV *Zero = getZero(Distance->getType()); + const SCEV *One = getOne(Distance->getType()); + const SCEV *DistancePlusOne = getAddExpr(Distance, One); + if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) { + // If Distance + 1 doesn't overflow, we can compute the maximum distance + // as "unsigned_max(Distance + 1) - 1". + ConstantRange CR = getUnsignedRange(DistancePlusOne); + MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1); + } + return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates); } // As a special case, handle the instance where Step is a positive power of diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index cd8c24630df1d..5c0d1aac1b98a 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -277,9 +277,10 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { + OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) const { int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + Opd1PropInfo, Opd2PropInfo, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index d31472c0d33c1..b79370baad102 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -526,7 +526,10 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, unsigned BitWidth = KnownZero.getBitWidth(); - for (auto &AssumeVH : Q.AC->assumptions()) { + // Note that the patterns below need to be kept in sync with the code + // in AssumptionCache::updateAffectedValues. + + for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { if (!AssumeVH) continue; CallInst *I = cast<CallInst>(AssumeVH); @@ -2580,51 +2583,70 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, return false; } -bool llvm::CannotBeOrderedLessThanZero(const Value *V, - const TargetLibraryInfo *TLI, - unsigned Depth) { - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) - return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); +/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a +/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign +/// bit despite comparing equal. +static bool cannotBeOrderedLessThanZeroImpl(const Value *V, + const TargetLibraryInfo *TLI, + bool SignBitOnly, + unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + return !CFP->getValueAPF().isNegative() || + (!SignBitOnly && CFP->getValueAPF().isZero()); + } if (Depth == MaxDepth) - return false; // Limit search depth. + return false; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); - if (!I) return false; + if (!I) + return false; switch (I->getOpcode()) { - default: break; + default: + break; // Unsigned integers are always nonnegative. case Instruction::UIToFP: return true; case Instruction::FMul: // x*x is always non-negative or a NaN. - if (I->getOperand(0) == I->getOperand(1)) + if (I->getOperand(0) == I->getOperand(1) && + (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) return true; + LLVM_FALLTHROUGH; case Instruction::FAdd: case Instruction::FDiv: case Instruction::FRem: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Instruction::Select: - return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); case Instruction::FPExt: case Instruction::FPTrunc: // Widening/narrowing never change sign. - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Instruction::Call: Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI); switch (IID) { default: break; case Intrinsic::maxnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) || - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) || + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::minnum: - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) && - CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly, + Depth + 1); case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: @@ -2636,18 +2658,30 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) return true; } - return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1); + return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, + Depth + 1); case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. return I->getOperand(0) == I->getOperand(1) && - CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1); + (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) && + cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly, + Depth + 1); } break; } return false; } +bool llvm::CannotBeOrderedLessThanZero(const Value *V, + const TargetLibraryInfo *TLI) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0); +} + +bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) { + return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); +} + /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, |