diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis')
79 files changed, 5706 insertions, 1907 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp index 32241e355eb8..1c7678a602d8 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -734,6 +735,15 @@ namespace { } // end anonymous namespace +ExternalAAWrapperPass::ExternalAAWrapperPass() : ImmutablePass(ID) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +ExternalAAWrapperPass::ExternalAAWrapperPass(CallbackT CB) + : ImmutablePass(ID), CB(std::move(CB)) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + char ExternalAAWrapperPass::ID = 0; INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis", @@ -784,7 +794,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { // previous object first, in this case replacing it with an empty one, before // registering new results. AAR.reset( - new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); + new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F))); // BasicAA is always available for function analyses. Also, we add it first // so that it can trump TBAA results when it proves MustAlias. @@ -836,11 +846,12 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addUsedIfAvailable<SCEVAAWrapperPass>(); AU.addUsedIfAvailable<CFLAndersAAWrapperPass>(); AU.addUsedIfAvailable<CFLSteensAAWrapperPass>(); + AU.addUsedIfAvailable<ExternalAAWrapperPass>(); } AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR) { - AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()); + AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)); // Add in our explicitly constructed BasicAA results. if (!DisableBasicAA) @@ -861,6 +872,9 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, AAR.addAAResult(WrapperPass->getResult()); if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLSteensAAWrapperPass>()) AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable<ExternalAAWrapperPass>()) + if (WrapperPass->CB) + WrapperPass->CB(P, F, AAR); return AAR; } @@ -904,4 +918,5 @@ void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) { AU.addUsedIfAvailable<GlobalsAAWrapperPass>(); AU.addUsedIfAvailable<CFLAndersAAWrapperPass>(); AU.addUsedIfAvailable<CFLSteensAAWrapperPass>(); + AU.addUsedIfAvailable<ExternalAAWrapperPass>(); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index e83703867e09..2e44bbd3a8ca 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp index a6e5b9fab558..5cc5ab597ef9 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -119,6 +120,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) { TotalMayAliasSetSize -= AS->size(); AliasSets.erase(AS); + // If we've removed the saturated alias set, set saturated marker back to + // nullptr and ensure this tracker is empty. + if (AS == AliasAnyAS) { + AliasAnyAS = nullptr; + assert(AliasSets.empty() && "Tracker not empty"); + } } void AliasSet::removeFromTracker(AliasSetTracker &AST) { @@ -690,8 +697,10 @@ void AliasSet::print(raw_ostream &OS) const { } void AliasSetTracker::print(raw_ostream &OS) const { - OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " - << PointerMap.size() << " pointer values.\n"; + OS << "Alias Set Tracker: " << AliasSets.size(); + if (AliasAnyAS) + OS << " (Saturated)"; + OS << " alias sets for " << PointerMap.size() << " pointer values.\n"; for (const AliasSet &AS : *this) AS.print(OS); OS << "\n"; diff --git a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp index d46a8d8e306c..af718526684b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp @@ -65,6 +65,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeModuleDebugInfoPrinterPass(Registry); initializeModuleSummaryIndexWrapperPassPass(Registry); initializeMustExecutePrinterPass(Registry); + initializeMustBeExecutedContextPrinterPass(Registry); initializeObjCARCAAWrapperPassPass(Registry); initializeOptimizationRemarkEmitterWrapperPassPass(Registry); initializePhiValuesWrapperPassPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp index cf2f845dee0a..f4d4a5ac8f88 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -130,7 +131,10 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) { if (AVI != AffectedValues.end()) AffectedValues.erase(AVI); } - remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }); + + AssumeHandles.erase( + remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }), + AssumeHandles.end()); } void AssumptionCache::AffectedValueCallbackVH::deleted() { @@ -140,7 +144,7 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() { // 'this' now dangles! } -void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { +void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) { auto &NAVV = getOrInsertAffectedValues(NV); auto AVI = AffectedValues.find(OV); if (AVI == AffectedValues.end()) @@ -149,6 +153,7 @@ void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { for (auto &A : AVI->second) if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) NAVV.push_back(A); + AffectedValues.erase(OV); } void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { @@ -157,7 +162,7 @@ void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { // Any assumptions that affected this value now affect the new value. - AC->copyAffectedValuesInCache(getValPtr(), NV); + AC->transferAffectedValuesInCache(getValPtr(), NV); // 'this' now might dangle! If the AffectedValues map was resized to add an // entry for NV then this object might have been destroyed in favor of some // copy in the grown map. @@ -252,7 +257,7 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) { // Ok, build a new cache by scanning the function, insert it and the value // handle into our map, and return the newly populated cache. auto IP = AssumptionCaches.insert(std::make_pair( - FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F))); + FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F))); assert(IP.second && "Scanning function already in the map?"); return *IP.first->second; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3721c99883b8..e852d663c6b4 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -25,9 +25,9 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/PhiValues.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" @@ -49,6 +49,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -233,6 +234,26 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; } +/// Return the minimal extent from \p V to the end of the underlying object, +/// assuming the result is used in an aliasing query. E.g., we do use the query +/// location size and the fact that null pointers cannot alias here. +static uint64_t getMinimalExtentFrom(const Value &V, + const LocationSize &LocSize, + const DataLayout &DL, + bool NullIsValidLoc) { + // If we have dereferenceability information we know a lower bound for the + // extent as accesses for a lower offset would be valid. We need to exclude + // the "or null" part if null is a valid pointer. + bool CanBeNull; + uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull); + DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes; + // If queried with a precise location size, we assume that location size to be + // accessed, thus valid. + if (LocSize.isPrecise()) + DerefBytes = std::max(DerefBytes, LocSize.getValue()); + return DerefBytes; +} + /// Returns true if we can prove that the object specified by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, const TargetLibraryInfo &TLI, bool NullIsValidLoc) { @@ -481,7 +502,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) { + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { V = RP; continue; } @@ -1461,7 +1482,8 @@ AliasResult BasicAAResult::aliasGEP( // give up if we can't determine conditions that hold for every cycle: const Value *V = DecompGEP1.VarIndices[i].V; - KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT); + KnownBits Known = + computeKnownBits(V, DL, 0, &AC, dyn_cast<Instruction>(GEP1), DT); bool SignKnownZero = Known.isNonNegative(); bool SignKnownOne = Known.isNegative(); @@ -1792,10 +1814,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. bool NullIsValidLocation = NullPointerIsDefined(&F); - if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI, - NullIsValidLocation)) || - (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI, - NullIsValidLocation))) + if ((isObjectSmallerThan( + O2, getMinimalExtentFrom(*V1, V1Size, DL, NullIsValidLocation), DL, + TLI, NullIsValidLocation)) || + (isObjectSmallerThan( + O1, getMinimalExtentFrom(*V2, V2Size, DL, NullIsValidLocation), DL, + TLI, NullIsValidLocation))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -2027,7 +2051,7 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { } BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { - initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); } char BasicAAWrapperPass::ID = 0; @@ -2053,8 +2077,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>(); - Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(), - ACT.getAssumptionCache(F), &DTWP.getDomTree(), + Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, + TLIWP.getTLI(F), ACT.getAssumptionCache(F), + &DTWP.getDomTree(), LIWP ? &LIWP->getLoopInfo() : nullptr, PVWP ? &PVWP->getResult() : nullptr)); @@ -2071,8 +2096,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { return BasicAAResult( - F.getParent()->getDataLayout(), - F, - P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + F.getParent()->getDataLayout(), F, + P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F), P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp index de183bbde173..544bd7757ae4 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 5eb95003f5d8..ffba65b5ed5e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -31,9 +32,11 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -61,6 +64,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) +BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass() + : FunctionPass(ID) { + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + char BranchProbabilityInfoWrapperPass::ID = 0; // Weights are for internal use only. They are used by heuristics to help to @@ -118,6 +127,13 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; +/// This is the probability for an ordered floating point comparison. +static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; +/// This is the probability for an unordered floating point comparison, it means +/// one or two of the operands are NaN. Usually it is used to test for an +/// exceptional case, so the result is unlikely. +static const uint32_t FPH_UNO_WEIGHT = 1; + /// Invoke-terminating normal branch taken weight /// /// This is the weight for branching to the normal destination of an invoke @@ -131,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -/// Add \p BB to PostDominatedByUnreachable set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa<UnreachableInst>(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable since - // the @llvm.experimental.deoptimize call is expected to practically - // never execute. - BB->getTerminatingDeoptimizeCall()) - PostDominatedByUnreachable.insert(BB); - return; - } +static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, + SmallVectorImpl<const BasicBlock *> &WorkList, + SmallPtrSetImpl<const BasicBlock *> &TargetSet) { + SmallVector<BasicBlock *, 8> Descendants; + SmallPtrSet<const BasicBlock *, 16> NewItems; + + PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants); + for (auto *BB : Descendants) + if (TargetSet.insert(BB).second) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (!TargetSet.count(*PI)) + NewItems.insert(*PI); + WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); +} - // If the terminator is an InvokeInst, check only the normal destination block - // as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast<InvokeInst>(TI)) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - PostDominatedByUnreachable.insert(BB); - return; +/// Compute a set of basic blocks that are post-dominated by unreachables. +void BranchProbabilityInfo::computePostDominatedByUnreachable( + const Function &F, PostDominatorTree *PDT) { + SmallVector<const BasicBlock *, 8> WorkList; + for (auto &BB : F) { + const Instruction *TI = BB.getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa<UnreachableInst>(TI) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since the @llvm.experimental.deoptimize call is expected to + // practically never execute. + BB.getTerminatingDeoptimizeCall()) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); + } } - for (auto *I : successors(BB)) - // If any of successor is not post dominated then BB is also not. - if (!PostDominatedByUnreachable.count(I)) - return; - - PostDominatedByUnreachable.insert(BB); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (PostDominatedByUnreachable.count(BB)) + continue; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (PostDominatedByUnreachable.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } + // If all the successors are unreachable, BB is unreachable as well. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByUnreachable.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } } -/// Add \p BB to PostDominatedByColdCall set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { - assert(!PostDominatedByColdCall.count(BB)); - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) - return; +/// compute a set of basic blocks that are post-dominated by ColdCalls. +void BranchProbabilityInfo::computePostDominatedByColdCall( + const Function &F, PostDominatorTree *PDT) { + SmallVector<const BasicBlock *, 8> WorkList; + for (auto &BB : F) + for (auto &I : BB) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); - // If all of successor are post dominated then BB is also done. - if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { - return PostDominatedByColdCall.count(SuccBB); - })) { - PostDominatedByColdCall.insert(BB); - return; - } + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast<InvokeInst>(TI)) - if (PostDominatedByColdCall.count(II->getNormalDest())) { - PostDominatedByColdCall.insert(BB); - return; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (PostDominatedByColdCall.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); } - - // Otherwise, if the block itself contains a cold function, add it to the - // set of blocks post-dominated by a cold call. - for (auto &I : *BB) - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->hasFnAttr(Attribute::Cold)) { - PostDominatedByColdCall.insert(BB); - return; - } + // If all of successor are post dominated then BB is also done. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByColdCall.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); + } } /// Calculate edge weights for successors lead to unreachable. @@ -778,6 +808,8 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!FCmp) return false; + uint32_t TakenWeight = FPH_TAKEN_WEIGHT; + uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT; bool isProb; if (FCmp->isEquality()) { // f1 == f2 -> Unlikely @@ -786,9 +818,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { // !isnan -> Likely isProb = true; + TakenWeight = FPH_ORD_WEIGHT; + NontakenWeight = FPH_UNO_WEIGHT; } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { // isnan -> Unlikely isProb = false; + TakenWeight = FPH_ORD_WEIGHT; + NontakenWeight = FPH_UNO_WEIGHT; } else { return false; } @@ -798,8 +834,7 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - BranchProbability TakenProb(FPH_TAKEN_WEIGHT, - FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight); setEdgeProbability(BB, TakenIdx, TakenProb); setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; @@ -963,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } + std::unique_ptr<PostDominatorTree> PDT = + std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); + computePostDominatedByUnreachable(F, PDT.get()); + computePostDominatedByColdCall(F, PDT.get()); + // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); - updatePostDominatedByUnreachable(BB); - updatePostDominatedByColdCall(BB); // If there is no at least two successors, no sense to set probability. if (BB->getTerminator()->getNumSuccessors() < 2) continue; @@ -1014,7 +1052,8 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); BPI.calculate(F, LI, &TLI); return false; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp index 18b83d6838cc..8215b4ecbb03 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp @@ -87,11 +87,18 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB, /// with multiple predecessors. bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges) { - assert(TI->isTerminator() && "Must be a terminator to have successors!"); assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + return isCriticalEdge(TI, TI->getSuccessor(SuccNum), AllowIdenticalEdges); +} + +bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest, + bool AllowIdenticalEdges) { + assert(TI->isTerminator() && "Must be a terminator to have successors!"); if (TI->getNumSuccessors() == 1) return false; - const BasicBlock *Dest = TI->getSuccessor(SuccNum); + assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) && + "No edge between TI's block and Dest."); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); // If there is more than one predecessor, this is a critical edge... diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp index 619b675b58d8..88e7d3bdede1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp @@ -18,7 +18,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFGPrinter.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" using namespace llvm; @@ -99,7 +101,7 @@ static void writeCFGToDotFile(Function &F, bool CFGOnly = false) { errs() << "Writing '" << Filename << "'..."; std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); if (!EC) WriteGraph(File, (const Function*)&F, CFGOnly); diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 690e514d4f5c..eb5c96e6eeca 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -69,6 +69,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" @@ -88,9 +89,11 @@ using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-anders-aa" -CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {} +CFLAndersAAResult::CFLAndersAAResult( + std::function<const TargetLibraryInfo &(Function &F)> GetTLI) + : GetTLI(std::move(GetTLI)) {} CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) - : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {} + : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {} CFLAndersAAResult::~CFLAndersAAResult() = default; namespace { @@ -779,7 +782,7 @@ static AliasAttrMap buildAttrMap(const CFLGraph &Graph, CFLAndersAAResult::FunctionInfo CFLAndersAAResult::buildInfoFrom(const Function &Fn) { CFLGraphBuilder<CFLAndersAAResult> GraphBuilder( - *this, TLI, + *this, GetTLI(const_cast<Function &>(Fn)), // Cast away the constness here due to GraphBuilder's API requirement const_cast<Function &>(Fn)); auto &Graph = GraphBuilder.getCFLGraph(); @@ -898,7 +901,10 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, AnalysisKey CFLAndersAA::Key; CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { - return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F)); + auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & { + return AM.getResult<TargetLibraryAnalysis>(F); + }; + return CFLAndersAAResult(GetTLI); } char CFLAndersAAWrapperPass::ID = 0; @@ -914,8 +920,10 @@ CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { } void CFLAndersAAWrapperPass::initializePass() { - auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); - Result.reset(new CFLAndersAAResult(TLIWP.getTLI())); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + Result.reset(new CFLAndersAAResult(GetTLI)); } void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index 44b1834f70bf..85a8c3d2a00b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -46,6 +46,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -60,10 +61,11 @@ using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-steens-aa" -CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI) - : AAResultBase(), TLI(TLI) {} +CFLSteensAAResult::CFLSteensAAResult( + std::function<const TargetLibraryInfo &(Function &F)> GetTLI) + : AAResultBase(), GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) - : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} + : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; /// Information we have about a function and would like to keep around. @@ -181,7 +183,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo( // Builds the graph + StratifiedSets for a function. CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { - CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn); + CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn); StratifiedSetsBuilder<InstantiatedValue> SetBuilder; // Add all CFLGraph nodes and all Dereference edges to StratifiedSets @@ -331,7 +333,10 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, AnalysisKey CFLSteensAA::Key; CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { - return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F)); + auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { + return AM.getResult<TargetLibraryAnalysis>(F); + }; + return CFLSteensAAResult(GetTLI); } char CFLSteensAAWrapperPass::ID = 0; @@ -347,8 +352,10 @@ CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { } void CFLSteensAAWrapperPass::initializePass() { - auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); - Result.reset(new CFLSteensAAResult(TLIWP.getTLI())); + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + Result.reset(new CFLSteensAAResult(GetTLI)); } void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp index ec5e94d499be..8e8a50178518 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp @@ -10,10 +10,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -29,7 +30,7 @@ using namespace llvm; CallGraph::CallGraph(Module &M) : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)), - CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) { + CallsExternalNode(std::make_unique<CallGraphNode>(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); @@ -150,7 +151,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { return CGN.get(); assert((!F || F->getParent() == &M) && "Function not in current module!"); - CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F)); + CGN = std::make_unique<CallGraphNode>(const_cast<Function *>(F)); return CGN.get(); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp index d24cbd104bf6..7246b73bfd4b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/CallPrinter.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp index adaa83a6c443..20e2f06540a3 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp @@ -33,6 +33,22 @@ CaptureTracker::~CaptureTracker() {} bool CaptureTracker::shouldExplore(const Use *U) { return true; } +bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { + // An inbounds GEP can either be a valid pointer (pointing into + // or to the end of an allocation), or be null in the default + // address space. So for an inbounds GEP there is no way to let + // the pointer escape using clever GEP hacking because doing so + // would make the pointer point outside of the allocated object + // and thus make the GEP result a poison value. Similarly, other + // dereferenceable pointers cannot be manipulated without producing + // poison. + if (auto *GEP = dyn_cast<GetElementPtrInst>(O)) + if (GEP->isInBounds()) + return true; + bool CanBeNull; + return O->getPointerDereferenceableBytes(DL, CanBeNull); +} + namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) @@ -251,7 +267,8 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // marked with nocapture do not capture. This means that places like // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression // in BasicAA also need to know about this property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) { + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, + true)) { AddUses(Call); break; } @@ -330,7 +347,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, AddUses(I); break; case Instruction::ICmp: { - if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) { + unsigned Idx = (I->getOperand(0) == V) ? 0 : 1; + unsigned OtherIdx = 1 - Idx; + if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. @@ -338,29 +357,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, if (isNoAliasCall(V->stripPointerCasts())) break; if (!I->getFunction()->nullPointerIsDefined()) { - auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation(); - // An inbounds GEP can either be a valid pointer (pointing into - // or to the end of an allocation), or be null in the default - // address space. So for an inbounds GEPs there is no way to let - // the pointer escape using clever GEP hacking because doing so - // would make the pointer point outside of the allocated object - // and thus make the GEP result a poison value. - if (auto *GEP = dyn_cast<GetElementPtrInst>(O)) - if (GEP->isInBounds()) - break; - // Comparing a dereferenceable_or_null argument against null - // cannot lead to pointer escapes, because if it is not null it - // must be a valid (in-bounds) pointer. - bool CanBeNull; - if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull)) + auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); + // Comparing a dereferenceable_or_null pointer against null cannot + // lead to pointer escapes, because if it is not null it must be a + // valid (in-bounds) pointer. + if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout())) break; } } // Comparison against value stored in global variable. Given the pointer // does not escape, its value cannot be guessed and stored separately in a // global variable. - unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0; - auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIndex)); + auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx)); if (LI && isa<GlobalVariable>(LI->getPointerOperand())) break; // Otherwise, be conservative. There are crazy ways to capture pointers diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp index 20231ca78b45..b32924e6497a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp @@ -37,6 +37,8 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -93,6 +95,9 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { + assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) && + "Invalid constantexpr bitcast!"); + // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -521,8 +526,23 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, return nullptr; C = FoldBitCast(C, MapTy->getPointerTo(AS), DL); - if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) - return FoldBitCast(Res, LoadTy, DL); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) { + if (Res->isNullValue() && !LoadTy->isX86_MMXTy()) + // Materializing a zero can be done trivially without a bitcast + return Constant::getNullValue(LoadTy); + Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; + Res = FoldBitCast(Res, CastTy, DL); + if (LoadTy->isPtrOrPtrVectorTy()) { + // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr + if (Res->isNullValue() && !LoadTy->isX86_MMXTy()) + return Constant::getNullValue(LoadTy); + if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) + // Be careful not to replace a load of an addrspace value with an inttoptr here + return nullptr; + Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy); + } + return Res; + } return nullptr; } @@ -544,7 +564,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType()); // If we're not accessing anything in this constant, the result is undefined. - if (Offset + BytesLoaded <= 0) + if (Offset <= -1 * static_cast<int64_t>(BytesLoaded)) return UndefValue::get(IntType); // If we're not accessing anything in this constant, the result is undefined. @@ -746,8 +766,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, Type *ResultTy, Optional<unsigned> InRangeIndex, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *IntPtrTy = DL.getIntPtrType(ResultTy); - Type *IntPtrScalarTy = IntPtrTy->getScalarType(); + Type *IntIdxTy = DL.getIndexType(ResultTy); + Type *IntIdxScalarTy = IntIdxTy->getScalarType(); bool Any = false; SmallVector<Constant*, 32> NewIdxs; @@ -755,11 +775,11 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType( SrcElemTy, Ops.slice(1, i - 1)))) && - Ops[i]->getType()->getScalarType() != IntPtrScalarTy) { + Ops[i]->getType()->getScalarType() != IntIdxScalarTy) { Any = true; Type *NewType = Ops[i]->getType()->isVectorTy() - ? IntPtrTy - : IntPtrTy->getScalarType(); + ? IntIdxTy + : IntIdxScalarTy; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], true, NewType, @@ -781,10 +801,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops, } /// Strip the pointer casts, but preserve the address space information. -Constant* StripPtrCastKeepAS(Constant* Ptr, Type *&ElemTy) { +Constant *StripPtrCastKeepAS(Constant *Ptr, Type *&ElemTy) { assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); auto *OldPtrTy = cast<PointerType>(Ptr->getType()); - Ptr = Ptr->stripPointerCasts(); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); auto *NewPtrTy = cast<PointerType>(Ptr->getType()); ElemTy = NewPtrTy->getPointerElementType(); @@ -819,7 +839,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (!Ptr->getType()->isPointerTy()) return nullptr; - Type *IntPtrTy = DL.getIntPtrType(Ptr->getType()); + Type *IntIdxTy = DL.getIndexType(Ptr->getType()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' @@ -830,7 +850,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // "inttoptr (sub (ptrtoint Ptr), V)" if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { auto *CE = dyn_cast<ConstantExpr>(Ops[1]); - assert((!CE || CE->getType() == IntPtrTy) && + assert((!CE || CE->getType() == IntIdxTy) && "CastGEPIndices didn't canonicalize index types!"); if (CE && CE->getOpcode() == Instruction::Sub && CE->getOperand(0)->isNullValue()) { @@ -845,7 +865,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, return nullptr; } - unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy); + unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy); APInt Offset = APInt(BitWidth, DL.getIndexedOffsetInType( @@ -925,7 +945,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can // accommodate the offset. - NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0)); + NewIdxs.push_back(ConstantInt::get(IntIdxTy, 0)); } else { // The element size is non-zero divide the offset by the element // size (rounding down), to compute the index at this level. @@ -934,7 +954,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (Overflow) break; Offset -= NewIdx * ElemSize; - NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); + NewIdxs.push_back(ConstantInt::get(IntIdxTy, NewIdx)); } } else { auto *STy = cast<StructType>(Ty); @@ -1038,7 +1058,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, return ConstantExpr::getExtractElement(Ops[0], Ops[1]); case Instruction::ExtractValue: return ConstantExpr::getExtractValue( - Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices()); + Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices()); case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: @@ -1464,40 +1484,50 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { if (!F->hasName()) return false; - StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. + StringRef Name = F->getName(); switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || Name == "atan" || - Name == "atan2" || Name == "acosf" || Name == "asinf" || - Name == "atanf" || Name == "atan2f"; + return Name == "acos" || Name == "acosf" || + Name == "asin" || Name == "asinf" || + Name == "atan" || Name == "atanf" || + Name == "atan2" || Name == "atan2f"; case 'c': - return Name == "ceil" || Name == "cos" || Name == "cosh" || - Name == "ceilf" || Name == "cosf" || Name == "coshf"; + return Name == "ceil" || Name == "ceilf" || + Name == "cos" || Name == "cosf" || + Name == "cosh" || Name == "coshf"; case 'e': - return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f"; + return Name == "exp" || Name == "expf" || + Name == "exp2" || Name == "exp2f"; case 'f': - return Name == "fabs" || Name == "floor" || Name == "fmod" || - Name == "fabsf" || Name == "floorf" || Name == "fmodf"; + return Name == "fabs" || Name == "fabsf" || + Name == "floor" || Name == "floorf" || + Name == "fmod" || Name == "fmodf"; case 'l': - return Name == "log" || Name == "log10" || Name == "logf" || - Name == "log10f"; + return Name == "log" || Name == "logf" || + Name == "log2" || Name == "log2f" || + Name == "log10" || Name == "log10f"; + case 'n': + return Name == "nearbyint" || Name == "nearbyintf"; case 'p': return Name == "pow" || Name == "powf"; case 'r': - return Name == "round" || Name == "roundf"; + return Name == "rint" || Name == "rintf" || + Name == "round" || Name == "roundf"; case 's': - return Name == "sin" || Name == "sinh" || Name == "sqrt" || - Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; + return Name == "sin" || Name == "sinf" || + Name == "sinh" || Name == "sinhf" || + Name == "sqrt" || Name == "sqrtf"; case 't': - return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; + return Name == "tan" || Name == "tanf" || + Name == "tanh" || Name == "tanhf" || + Name == "trunc" || Name == "truncf"; case '_': - // Check for various function names that get used for the math functions // when the header files are preprocessed with the macro // __FINITE_MATH_ONLY__ enabled. @@ -1713,40 +1743,37 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; - if (IntrinsicID == Intrinsic::round) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToAway); - return ConstantFP::get(Ty->getContext(), V); + // Use internal versions of these intrinsics. + APFloat U = Op->getValueAPF(); + + if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::floor) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardNegative); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::round) { + U.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::ceil) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardPositive); - return ConstantFP::get(Ty->getContext(), V); + U.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::trunc) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardZero); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::floor) { + U.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::rint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::trunc) { + U.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::nearbyint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::fabs) { + U.clearSign(); + return ConstantFP::get(Ty->getContext(), U); } /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1763,18 +1790,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, switch (IntrinsicID) { default: break; - case Intrinsic::fabs: - return ConstantFoldFP(fabs, V, Ty); - case Intrinsic::log2: - return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log: return ConstantFoldFP(log, V, Ty); + case Intrinsic::log2: + // TODO: What about hosts that lack a C99 library? + return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log10: + // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, V, Ty); case Intrinsic::exp: return ConstantFoldFP(exp, V, Ty); case Intrinsic::exp2: - return ConstantFoldFP(exp2, V, Ty); + // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); case Intrinsic::sin: return ConstantFoldFP(sin, V, Ty); case Intrinsic::cos: @@ -1786,104 +1814,150 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (!TLI) return nullptr; - char NameKeyChar = Name[0]; - if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') - NameKeyChar = Name[2]; - - switch (NameKeyChar) { - case 'a': - if ((Name == "acos" && TLI->has(LibFunc_acos)) || - (Name == "acosf" && TLI->has(LibFunc_acosf)) || - (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || - (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) + LibFunc Func = NotLibFunc; + TLI->getLibFunc(Name, Func); + switch (Func) { + default: + break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acos_finite: + case LibFunc_acosf_finite: + if (TLI->has(Func)) return ConstantFoldFP(acos, V, Ty); - else if ((Name == "asin" && TLI->has(LibFunc_asin)) || - (Name == "asinf" && TLI->has(LibFunc_asinf)) || - (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || - (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) + break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asin_finite: + case LibFunc_asinf_finite: + if (TLI->has(Func)) return ConstantFoldFP(asin, V, Ty); - else if ((Name == "atan" && TLI->has(LibFunc_atan)) || - (Name == "atanf" && TLI->has(LibFunc_atanf))) + break; + case LibFunc_atan: + case LibFunc_atanf: + if (TLI->has(Func)) return ConstantFoldFP(atan, V, Ty); break; - case 'c': - if ((Name == "ceil" && TLI->has(LibFunc_ceil)) || - (Name == "ceilf" && TLI->has(LibFunc_ceilf))) - return ConstantFoldFP(ceil, V, Ty); - else if ((Name == "cos" && TLI->has(LibFunc_cos)) || - (Name == "cosf" && TLI->has(LibFunc_cosf))) + case LibFunc_ceil: + case LibFunc_ceilf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_cos: + case LibFunc_cosf: + if (TLI->has(Func)) return ConstantFoldFP(cos, V, Ty); - else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || - (Name == "coshf" && TLI->has(LibFunc_coshf)) || - (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || - (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_cosh_finite: + case LibFunc_coshf_finite: + if (TLI->has(Func)) return ConstantFoldFP(cosh, V, Ty); break; - case 'e': - if ((Name == "exp" && TLI->has(LibFunc_exp)) || - (Name == "expf" && TLI->has(LibFunc_expf)) || - (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || - (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_exp_finite: + case LibFunc_expf_finite: + if (TLI->has(Func)) return ConstantFoldFP(exp, V, Ty); - if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || - (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || - (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || - (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) - // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a - // C99 library. + break; + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2_finite: + case LibFunc_exp2f_finite: + if (TLI->has(Func)) + // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); break; - case 'f': - if ((Name == "fabs" && TLI->has(LibFunc_fabs)) || - (Name == "fabsf" && TLI->has(LibFunc_fabsf))) - return ConstantFoldFP(fabs, V, Ty); - else if ((Name == "floor" && TLI->has(LibFunc_floor)) || - (Name == "floorf" && TLI->has(LibFunc_floorf))) - return ConstantFoldFP(floor, V, Ty); + case LibFunc_fabs: + case LibFunc_fabsf: + if (TLI->has(Func)) { + U.clearSign(); + return ConstantFP::get(Ty->getContext(), U); + } break; - case 'l': - if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || - (Name == "__log_finite" && V > 0 && - TLI->has(LibFunc_log_finite)) || - (Name == "__logf_finite" && V > 0 && - TLI->has(LibFunc_logf_finite))) + case LibFunc_floor: + case LibFunc_floorf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_log: + case LibFunc_logf: + case LibFunc_log_finite: + case LibFunc_logf_finite: + if (V > 0.0 && TLI->has(Func)) return ConstantFoldFP(log, V, Ty); - else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || - (Name == "__log10_finite" && V > 0 && - TLI->has(LibFunc_log10_finite)) || - (Name == "__log10f_finite" && V > 0 && - TLI->has(LibFunc_log10f_finite))) + break; + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2_finite: + case LibFunc_log2f_finite: + if (V > 0.0 && TLI->has(Func)) + // TODO: What about hosts that lack a C99 library? + return ConstantFoldFP(Log2, V, Ty); + break; + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10_finite: + case LibFunc_log10f_finite: + if (V > 0.0 && TLI->has(Func)) + // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, V, Ty); break; - case 'r': - if ((Name == "round" && TLI->has(LibFunc_round)) || - (Name == "roundf" && TLI->has(LibFunc_roundf))) - return ConstantFoldFP(round, V, Ty); + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_rint: + case LibFunc_rintf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); + } break; - case 's': - if ((Name == "sin" && TLI->has(LibFunc_sin)) || - (Name == "sinf" && TLI->has(LibFunc_sinf))) + case LibFunc_round: + case LibFunc_roundf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_sin: + case LibFunc_sinf: + if (TLI->has(Func)) return ConstantFoldFP(sin, V, Ty); - else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || - (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || - (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || - (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinh_finite: + case LibFunc_sinhf_finite: + if (TLI->has(Func)) return ConstantFoldFP(sinh, V, Ty); - else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || - (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) + break; + case LibFunc_sqrt: + case LibFunc_sqrtf: + if (V >= 0.0 && TLI->has(Func)) return ConstantFoldFP(sqrt, V, Ty); break; - case 't': - if ((Name == "tan" && TLI->has(LibFunc_tan)) || - (Name == "tanf" && TLI->has(LibFunc_tanf))) + case LibFunc_tan: + case LibFunc_tanf: + if (TLI->has(Func)) return ConstantFoldFP(tan, V, Ty); - else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) || - (Name == "tanhf" && TLI->has(LibFunc_tanhf))) + break; + case LibFunc_tanh: + case LibFunc_tanhf: + if (TLI->has(Func)) return ConstantFoldFP(tanh, V, Ty); break; - default: + case LibFunc_trunc: + case LibFunc_truncf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), U); + } break; } return nullptr; @@ -2002,19 +2076,35 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, if (!TLI) return nullptr; - if ((Name == "pow" && TLI->has(LibFunc_pow)) || - (Name == "powf" && TLI->has(LibFunc_powf)) || - (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || - (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || - (Name == "fmodf" && TLI->has(LibFunc_fmodf))) - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || - (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || - (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || - (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + + LibFunc Func = NotLibFunc; + TLI->getLibFunc(Name, Func); + switch (Func) { + default: + break; + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_pow_finite: + case LibFunc_powf_finite: + if (TLI->has(Func)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + break; + case LibFunc_fmod: + case LibFunc_fmodf: + if (TLI->has(Func)) { + APFloat V = Op1->getValueAPF(); + if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) + return ConstantFP::get(Ty->getContext(), V); + } + break; + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2_finite: + case LibFunc_atan2f_finite: + if (TLI->has(Func)) + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + break; + } } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) return ConstantFP::get(Ty->getContext(), @@ -2041,20 +2131,27 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, switch (IntrinsicID) { default: break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + // X - undef -> { undef, false } + // undef - X -> { undef, false } + // X + undef -> { undef, false } + // undef + x -> { undef, false } + if (!C0 || !C1) { + return ConstantStruct::get( + cast<StructType>(Ty), + {UndefValue::get(Ty->getStructElementType(0)), + Constant::getNullValue(Ty->getStructElementType(1))}); + } + LLVM_FALLTHROUGH; case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - // Even if both operands are undef, we cannot fold muls to undef - // in the general case. For example, on i2 there are no inputs - // that would produce { i2 -1, i1 true } as the result. + case Intrinsic::umul_with_overflow: { + // undef * X -> { 0, false } + // X * undef -> { 0, false } if (!C0 || !C1) return Constant::getNullValue(Ty); - LLVM_FALLTHROUGH; - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: { - if (!C0 || !C1) - return UndefValue::get(Ty); APInt Res; bool Overflow; @@ -2194,13 +2291,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, case Intrinsic::fma: case Intrinsic::fmuladd: { APFloat V = Op1->getValueAPF(); - APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(), - Op3->getValueAPF(), - APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) - return ConstantFP::get(Ty->getContext(), V); - - return nullptr; + V.fusedMultiplyAdd(Op2->getValueAPF(), Op3->getValueAPF(), + APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); } } } diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp index bf0cdbfd0c8b..953da964c435 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp new file mode 100644 index 000000000000..90ce13e6f650 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp @@ -0,0 +1,283 @@ +//===- DDG.cpp - Data Dependence Graph -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The implementation for the data dependence graph. +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/DDG.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt<bool> + CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Create pi-block nodes.")); + +#define DEBUG_TYPE "ddg" + +template class llvm::DGEdge<DDGNode, DDGEdge>; +template class llvm::DGNode<DDGNode, DDGEdge>; +template class llvm::DirectedGraph<DDGNode, DDGEdge>; + +//===--------------------------------------------------------------------===// +// DDGNode implementation +//===--------------------------------------------------------------------===// +DDGNode::~DDGNode() {} + +bool DDGNode::collectInstructions( + llvm::function_ref<bool(Instruction *)> const &Pred, + InstructionListType &IList) const { + assert(IList.empty() && "Expected the IList to be empty on entry."); + if (isa<SimpleDDGNode>(this)) { + for (Instruction *I : cast<const SimpleDDGNode>(this)->getInstructions()) + if (Pred(I)) + IList.push_back(I); + } else if (isa<PiBlockDDGNode>(this)) { + for (const DDGNode *PN : cast<const PiBlockDDGNode>(this)->getNodes()) { + assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported."); + SmallVector<Instruction *, 8> TmpIList; + PN->collectInstructions(Pred, TmpIList); + IList.insert(IList.end(), TmpIList.begin(), TmpIList.end()); + } + } else + llvm_unreachable("unimplemented type of node"); + return !IList.empty(); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) { + const char *Out; + switch (K) { + case DDGNode::NodeKind::SingleInstruction: + Out = "single-instruction"; + break; + case DDGNode::NodeKind::MultiInstruction: + Out = "multi-instruction"; + break; + case DDGNode::NodeKind::PiBlock: + Out = "pi-block"; + break; + case DDGNode::NodeKind::Root: + Out = "root"; + break; + case DDGNode::NodeKind::Unknown: + Out = "?? (error)"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { + OS << "Node Address:" << &N << ":" << N.getKind() << "\n"; + if (isa<SimpleDDGNode>(N)) { + OS << " Instructions:\n"; + for (const Instruction *I : cast<const SimpleDDGNode>(N).getInstructions()) + OS.indent(2) << *I << "\n"; + } else if (isa<PiBlockDDGNode>(&N)) { + OS << "--- start of nodes in pi-block ---\n"; + auto &Nodes = cast<const PiBlockDDGNode>(&N)->getNodes(); + unsigned Count = 0; + for (const DDGNode *N : Nodes) + OS << *N << (++Count == Nodes.size() ? "" : "\n"); + OS << "--- end of nodes in pi-block ---\n"; + } else if (!isa<RootDDGNode>(N)) + llvm_unreachable("unimplemented type of node"); + + OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); + for (auto &E : N.getEdges()) + OS.indent(2) << *E; + return OS; +} + +//===--------------------------------------------------------------------===// +// SimpleDDGNode implementation +//===--------------------------------------------------------------------===// + +SimpleDDGNode::SimpleDDGNode(Instruction &I) + : DDGNode(NodeKind::SingleInstruction), InstList() { + assert(InstList.empty() && "Expected empty list."); + InstList.push_back(&I); +} + +SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N) + : DDGNode(N), InstList(N.InstList) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N) + : DDGNode(std::move(N)), InstList(std::move(N.InstList)) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); } + +//===--------------------------------------------------------------------===// +// PiBlockDDGNode implementation +//===--------------------------------------------------------------------===// + +PiBlockDDGNode::PiBlockDDGNode(const PiNodeList &List) + : DDGNode(NodeKind::PiBlock), NodeList(List) { + assert(!NodeList.empty() && "pi-block node constructed with an empty list."); +} + +PiBlockDDGNode::PiBlockDDGNode(const PiBlockDDGNode &N) + : DDGNode(N), NodeList(N.NodeList) { + assert(getKind() == NodeKind::PiBlock && !NodeList.empty() && + "constructing from invalid pi-block node."); +} + +PiBlockDDGNode::PiBlockDDGNode(PiBlockDDGNode &&N) + : DDGNode(std::move(N)), NodeList(std::move(N.NodeList)) { + assert(getKind() == NodeKind::PiBlock && !NodeList.empty() && + "constructing from invalid pi-block node."); +} + +PiBlockDDGNode::~PiBlockDDGNode() { NodeList.clear(); } + +//===--------------------------------------------------------------------===// +// DDGEdge implementation +//===--------------------------------------------------------------------===// + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) { + const char *Out; + switch (K) { + case DDGEdge::EdgeKind::RegisterDefUse: + Out = "def-use"; + break; + case DDGEdge::EdgeKind::MemoryDependence: + Out = "memory"; + break; + case DDGEdge::EdgeKind::Rooted: + Out = "rooted"; + break; + case DDGEdge::EdgeKind::Unknown: + Out = "?? (error)"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) { + OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DataDependenceGraph implementation +//===--------------------------------------------------------------------===// +using BasicBlockListType = SmallVector<BasicBlock *, 8>; + +DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) + : DependenceGraphInfo(F.getName().str(), D) { + // Put the basic blocks in program order for correct dependence + // directions. + BasicBlockListType BBList; + for (auto &SCC : make_range(scc_begin(&F), scc_end(&F))) + for (BasicBlock * BB : SCC) + BBList.push_back(BB); + std::reverse(BBList.begin(), BBList.end()); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI, + DependenceInfo &D) + : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." + + L.getHeader()->getName()) + .str(), + D) { + // Put the basic blocks in program order for correct dependence + // directions. + LoopBlocksDFS DFS(&L); + DFS.perform(&LI); + BasicBlockListType BBList; + for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) + BBList.push_back(BB); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::~DataDependenceGraph() { + for (auto *N : Nodes) { + for (auto *E : *N) + delete E; + delete N; + } +} + +bool DataDependenceGraph::addNode(DDGNode &N) { + if (!DDGBase::addNode(N)) + return false; + + // In general, if the root node is already created and linked, it is not safe + // to add new nodes since they may be unreachable by the root. However, + // pi-block nodes need to be added after the root node is linked, and they are + // always reachable by the root, because they represent components that are + // already reachable by root. + auto *Pi = dyn_cast<PiBlockDDGNode>(&N); + assert((!Root || Pi) && + "Root node is already added. No more nodes can be added."); + + if (isa<RootDDGNode>(N)) + Root = &N; + + if (Pi) + for (DDGNode *NI : Pi->getNodes()) + PiBlockMap.insert(std::make_pair(NI, Pi)); + + return true; +} + +const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const { + if (PiBlockMap.find(&N) == PiBlockMap.end()) + return nullptr; + auto *Pi = PiBlockMap.find(&N)->second; + assert(PiBlockMap.find(Pi) == PiBlockMap.end() && + "Nested pi-blocks detected."); + return Pi; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) { + for (DDGNode *Node : G) + // Avoid printing nodes that are part of a pi-block twice. They will get + // printed when the pi-block is printed. + if (!G.getPiBlock(*Node)) + OS << *Node << "\n"; + OS << "\n"; + return OS; +} + +bool DDGBuilder::shouldCreatePiBlocks() const { + return CreatePiBlocks; +} + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// DDG as a loop pass. +DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + return std::make_unique<DataDependenceGraph>(L, AR.LI, DI); +} +AnalysisKey DDGAnalysis::Key; + +PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n"; + OS << *AM.getResult<DDGAnalysis>(L, AR); + return PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp index c1043e446beb..60cd1b5317d6 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp index 01b8ff10d355..aaee8c21f289 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp index 75f269e84f9d..9b38053c196b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -61,6 +61,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -141,6 +142,11 @@ INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", char DependenceAnalysisWrapperPass::ID = 0; +DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() + : FunctionPass(ID) { + initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); +} + FunctionPass *llvm::createDependenceAnalysisWrapperPass() { return new DependenceAnalysisWrapperPass(); } @@ -164,25 +170,25 @@ void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredTransitive<LoopInfoWrapperPass>(); } - // Used to test the dependence analyzer. -// Looks through the function, noting loads and stores. +// Looks through the function, noting instructions that may access memory. // Calls depends() on every possible pair and prints out the result. // Ignores all other instructions. static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { auto *F = DA->getFunction(); for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; ++SrcI) { - if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { + if (SrcI->mayReadOrWriteMemory()) { for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE; ++DstI) { - if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { - OS << "da analyze - "; + if (DstI->mayReadOrWriteMemory()) { + OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; + OS << " da analyze - "; if (auto D = DA->depends(&*SrcI, &*DstI, true)) { D->dump(OS); for (unsigned Level = 1; Level <= D->getLevels(); Level++) { if (D->isSplitable(Level)) { - OS << "da analyze - split level = " << Level; + OS << " da analyze - split level = " << Level; OS << ", iteration = " << *DA->getSplitIteration(*D, Level); OS << "!\n"; } @@ -254,7 +260,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination, LoopIndependent(PossiblyLoopIndependent) { Consistent = true; if (CommonLevels) - DV = make_unique<DVEntry[]>(CommonLevels); + DV = std::make_unique<DVEntry[]>(CommonLevels); } // The rest are simple getters that hide the implementation. @@ -876,14 +882,13 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { } } - // Examine the scev and return true iff it's linear. // Collect any loops mentioned in the set of "Loops". -bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, - SmallBitVector &Loops) { - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); +bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, + SmallBitVector &Loops, bool IsSrc) { + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); if (!AddRec) - return isLoopInvariant(Src, LoopNest); + return isLoopInvariant(Expr, LoopNest); const SCEV *Start = AddRec->getStart(); const SCEV *Step = AddRec->getStepRecurrence(*SE); const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); @@ -896,33 +901,25 @@ bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, } if (!isLoopInvariant(Step, LoopNest)) return false; - Loops.set(mapSrcLoop(AddRec->getLoop())); - return checkSrcSubscript(Start, LoopNest, Loops); + if (IsSrc) + Loops.set(mapSrcLoop(AddRec->getLoop())); + else + Loops.set(mapDstLoop(AddRec->getLoop())); + return checkSubscript(Start, LoopNest, Loops, IsSrc); } - +// Examine the scev and return true iff it's linear. +// Collect any loops mentioned in the set of "Loops". +bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, + SmallBitVector &Loops) { + return checkSubscript(Src, LoopNest, Loops, true); +} // Examine the scev and return true iff it's linear. // Collect any loops mentioned in the set of "Loops". bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, SmallBitVector &Loops) { - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); - if (!AddRec) - return isLoopInvariant(Dst, LoopNest); - const SCEV *Start = AddRec->getStart(); - const SCEV *Step = AddRec->getStepRecurrence(*SE); - const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); - if (!isa<SCEVCouldNotCompute>(UB)) { - if (SE->getTypeSizeInBits(Start->getType()) < - SE->getTypeSizeInBits(UB->getType())) { - if (!AddRec->getNoWrapFlags()) - return false; - } - } - if (!isLoopInvariant(Step, LoopNest)) - return false; - Loops.set(mapDstLoop(AddRec->getLoop())); - return checkDstSubscript(Start, LoopNest, Loops); + return checkSubscript(Dst, LoopNest, Loops, false); } @@ -3407,15 +3404,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (Src == Dst) PossiblyLoopIndependent = false; - if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || - (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) + if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) // if both instructions don't reference memory, there's no dependence return nullptr; if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); - return make_unique<Dependence>(Src, Dst); + return std::make_unique<Dependence>(Src, Dst); } assert(isLoadOrStore(Src) && "instruction is not load or store"); @@ -3430,7 +3426,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, case PartialAlias: // cannot analyse objects if we don't understand their aliasing. LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); - return make_unique<Dependence>(Src, Dst); + return std::make_unique<Dependence>(Src, Dst); case NoAlias: // If the objects noalias, they are distinct, accesses are independent. LLVM_DEBUG(dbgs() << "no alias\n"); @@ -3777,11 +3773,9 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, return nullptr; } - return make_unique<FullDependence>(std::move(Result)); + return std::make_unique<FullDependence>(std::move(Result)); } - - //===----------------------------------------------------------------------===// // getSplitIteration - // Rather than spend rarely-used space recording the splitting iteration diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp new file mode 100644 index 000000000000..e8a1a2fff919 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp @@ -0,0 +1,407 @@ +//===- DependenceGraphBuilder.cpp ------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements common steps of the build algorithm for construction +// of dependence graphs such as DDG and PDG. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DDG.h" + +using namespace llvm; + +#define DEBUG_TYPE "dgb" + +STATISTIC(TotalGraphs, "Number of dependence graphs created."); +STATISTIC(TotalDefUseEdges, "Number of def-use edges created."); +STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created."); +STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created."); +STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created."); +STATISTIC(TotalConfusedEdges, + "Number of confused memory dependencies between two nodes."); +STATISTIC(TotalEdgeReversals, + "Number of times the source and sink of dependence was reversed to " + "expose cycles in the graph."); + +using InstructionListType = SmallVector<Instruction *, 2>; + +//===--------------------------------------------------------------------===// +// AbstractDependenceGraphBuilder implementation +//===--------------------------------------------------------------------===// + +template <class G> +void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() { + // The BBList is expected to be in program order. + size_t NextOrdinal = 1; + for (auto *BB : BBList) + for (auto &I : *BB) + InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++)); +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() { + ++TotalGraphs; + assert(IMap.empty() && "Expected empty instruction map at start"); + for (BasicBlock *BB : BBList) + for (Instruction &I : *BB) { + auto &NewNode = createFineGrainedNode(I); + IMap.insert(std::make_pair(&I, &NewNode)); + NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I))); + ++TotalFineGrainedNodes; + } +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() { + // Create a root node that connects to every connected component of the graph. + // This is done to allow graph iterators to visit all the disjoint components + // of the graph, in a single walk. + // + // This algorithm works by going through each node of the graph and for each + // node N, do a DFS starting from N. A rooted edge is established between the + // root node and N (if N is not yet visited). All the nodes reachable from N + // are marked as visited and are skipped in the DFS of subsequent nodes. + // + // Note: This algorithm tries to limit the number of edges out of the root + // node to some extent, but there may be redundant edges created depending on + // the iteration order. For example for a graph {A -> B}, an edge from the + // root node is added to both nodes if B is visited before A. While it does + // not result in minimal number of edges, this approach saves compile-time + // while keeping the number of edges in check. + auto &RootNode = createRootNode(); + df_iterator_default_set<const NodeType *, 4> Visited; + for (auto *N : Graph) { + if (*N == RootNode) + continue; + for (auto I : depth_first_ext(N, Visited)) + if (I == N) + createRootedEdge(RootNode, *N); + } +} + +template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() { + if (!shouldCreatePiBlocks()) + return; + + LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n"); + + // The overall algorithm is as follows: + // 1. Identify SCCs and for each SCC create a pi-block node containing all + // the nodes in that SCC. + // 2. Identify incoming edges incident to the nodes inside of the SCC and + // reconnect them to the pi-block node. + // 3. Identify outgoing edges from the nodes inside of the SCC to nodes + // outside of it and reconnect them so that the edges are coming out of the + // SCC node instead. + + // Adding nodes as we iterate through the SCCs cause the SCC + // iterators to get invalidated. To prevent this invalidation, we first + // collect a list of nodes that are part of an SCC, and then iterate over + // those lists to create the pi-block nodes. Each element of the list is a + // list of nodes in an SCC. Note: trivial SCCs containing a single node are + // ignored. + SmallVector<NodeListType, 4> ListOfSCCs; + for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) { + if (SCC.size() > 1) + ListOfSCCs.emplace_back(SCC.begin(), SCC.end()); + } + + for (NodeListType &NL : ListOfSCCs) { + LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size() + << " nodes in it.\n"); + + // SCC iterator may put the nodes in an order that's different from the + // program order. To preserve original program order, we sort the list of + // nodes based on ordinal numbers computed earlier. + llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) { + return getOrdinal(*LHS) < getOrdinal(*RHS); + }); + + NodeType &PiNode = createPiBlock(NL); + ++TotalPiBlockNodes; + + // Build a set to speed up the lookup for edges whose targets + // are inside the SCC. + SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end()); + + // We have the set of nodes in the SCC. We go through the set of nodes + // that are outside of the SCC and look for edges that cross the two sets. + for (NodeType *N : Graph) { + + // Skip the SCC node and all the nodes inside of it. + if (*N == PiNode || NodesInSCC.count(N)) + continue; + + for (NodeType *SCCNode : NL) { + + enum Direction { + Incoming, // Incoming edges to the SCC + Outgoing, // Edges going ot of the SCC + DirectionCount // To make the enum usable as an array index. + }; + + // Use these flags to help us avoid creating redundant edges. If there + // are more than one edges from an outside node to inside nodes, we only + // keep one edge from that node to the pi-block node. Similarly, if + // there are more than one edges from inside nodes to an outside node, + // we only keep one edge from the pi-block node to the outside node. + // There is a flag defined for each direction (incoming vs outgoing) and + // for each type of edge supported, using a two-dimensional boolean + // array. + using EdgeKind = typename EdgeType::EdgeKind; + EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{ + false, false}; + + auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst, + const EdgeKind K) { + switch (K) { + case EdgeKind::RegisterDefUse: + createDefUseEdge(Src, Dst); + break; + case EdgeKind::MemoryDependence: + createMemoryEdge(Src, Dst); + break; + case EdgeKind::Rooted: + createRootedEdge(Src, Dst); + break; + default: + llvm_unreachable("Unsupported type of edge."); + } + }; + + auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New, + const Direction Dir) { + if (!Src->hasEdgeTo(*Dst)) + return; + LLVM_DEBUG(dbgs() + << "reconnecting(" + << (Dir == Direction::Incoming ? "incoming)" : "outgoing)") + << ":\nSrc:" << *Src << "\nDst:" << *Dst + << "\nNew:" << *New << "\n"); + assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) && + "Invalid direction."); + + SmallVector<EdgeType *, 10> EL; + Src->findEdgesTo(*Dst, EL); + for (EdgeType *OldEdge : EL) { + EdgeKind Kind = OldEdge->getKind(); + if (!EdgeAlreadyCreated[Dir][Kind]) { + if (Dir == Direction::Incoming) { + createEdgeOfKind(*Src, *New, Kind); + LLVM_DEBUG(dbgs() << "created edge from Src to New.\n"); + } else if (Dir == Direction::Outgoing) { + createEdgeOfKind(*New, *Dst, Kind); + LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n"); + } + EdgeAlreadyCreated[Dir][Kind] = true; + } + Src->removeEdge(*OldEdge); + destroyEdge(*OldEdge); + LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n"); + } + }; + + // Process incoming edges incident to the pi-block node. + reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming); + + // Process edges that are coming out of the pi-block node. + reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing); + } + } + } + + // Ordinal maps are no longer needed. + InstOrdinalMap.clear(); + NodeOrdinalMap.clear(); + + LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n"); +} + +template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() { + for (NodeType *N : Graph) { + InstructionListType SrcIList; + N->collectInstructions([](const Instruction *I) { return true; }, SrcIList); + + // Use a set to mark the targets that we link to N, so we don't add + // duplicate def-use edges when more than one instruction in a target node + // use results of instructions that are contained in N. + SmallPtrSet<NodeType *, 4> VisitedTargets; + + for (Instruction *II : SrcIList) { + for (User *U : II->users()) { + Instruction *UI = dyn_cast<Instruction>(U); + if (!UI) + continue; + NodeType *DstNode = nullptr; + if (IMap.find(UI) != IMap.end()) + DstNode = IMap.find(UI)->second; + + // In the case of loops, the scope of the subgraph is all the + // basic blocks (and instructions within them) belonging to the loop. We + // simply ignore all the edges coming from (or going into) instructions + // or basic blocks outside of this range. + if (!DstNode) { + LLVM_DEBUG( + dbgs() + << "skipped def-use edge since the sink" << *UI + << " is outside the range of instructions being considered.\n"); + continue; + } + + // Self dependencies are ignored because they are redundant and + // uninteresting. + if (DstNode == N) { + LLVM_DEBUG(dbgs() + << "skipped def-use edge since the sink and the source (" + << N << ") are the same.\n"); + continue; + } + + if (VisitedTargets.insert(DstNode).second) { + createDefUseEdge(*N, *DstNode); + ++TotalDefUseEdges; + } + } + } + } +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() { + using DGIterator = typename G::iterator; + auto isMemoryAccess = [](const Instruction *I) { + return I->mayReadOrWriteMemory(); + }; + for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) { + InstructionListType SrcIList; + (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList); + if (SrcIList.empty()) + continue; + + for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) { + if (**SrcIt == **DstIt) + continue; + InstructionListType DstIList; + (*DstIt)->collectInstructions(isMemoryAccess, DstIList); + if (DstIList.empty()) + continue; + bool ForwardEdgeCreated = false; + bool BackwardEdgeCreated = false; + for (Instruction *ISrc : SrcIList) { + for (Instruction *IDst : DstIList) { + auto D = DI.depends(ISrc, IDst, true); + if (!D) + continue; + + // If we have a dependence with its left-most non-'=' direction + // being '>' we need to reverse the direction of the edge, because + // the source of the dependence cannot occur after the sink. For + // confused dependencies, we will create edges in both directions to + // represent the possibility of a cycle. + + auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = BackwardEdgeCreated = true; + ++TotalConfusedEdges; + }; + + auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = true; + }; + + auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + BackwardEdgeCreated = true; + }; + + if (D->isConfused()) + createConfusedEdges(**SrcIt, **DstIt); + else if (D->isOrdered() && !D->isLoopIndependent()) { + bool ReversedEdge = false; + for (unsigned Level = 1; Level <= D->getLevels(); ++Level) { + if (D->getDirection(Level) == Dependence::DVEntry::EQ) + continue; + else if (D->getDirection(Level) == Dependence::DVEntry::GT) { + createBackwardEdge(**SrcIt, **DstIt); + ReversedEdge = true; + ++TotalEdgeReversals; + break; + } else if (D->getDirection(Level) == Dependence::DVEntry::LT) + break; + else { + createConfusedEdges(**SrcIt, **DstIt); + break; + } + } + if (!ReversedEdge) + createForwardEdge(**SrcIt, **DstIt); + } else + createForwardEdge(**SrcIt, **DstIt); + + // Avoid creating duplicate edges. + if (ForwardEdgeCreated && BackwardEdgeCreated) + break; + } + + // If we've created edges in both directions, there is no more + // unique edge that we can create between these two nodes, so we + // can exit early. + if (ForwardEdgeCreated && BackwardEdgeCreated) + break; + } + } + } +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() { + + // If we don't create pi-blocks, then we may not have a DAG. + if (!shouldCreatePiBlocks()) + return; + + SmallVector<NodeType *, 64> NodesInPO; + using NodeKind = typename NodeType::NodeKind; + for (NodeType *N : post_order(&Graph)) { + if (N->getKind() == NodeKind::PiBlock) { + // Put members of the pi-block right after the pi-block itself, for + // convenience. + const NodeListType &PiBlockMembers = getNodesInPiBlock(*N); + NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(), + PiBlockMembers.end()); + } + NodesInPO.push_back(N); + } + + size_t OldSize = Graph.Nodes.size(); + Graph.Nodes.clear(); + for (NodeType *N : reverse(NodesInPO)) + Graph.Nodes.push_back(N); + if (Graph.Nodes.size() != OldSize) + assert(false && + "Expected the number of nodes to stay the same after the sort"); +} + +template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>; +template class llvm::DependenceGraphInfo<DDGNode>; diff --git a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp index 0ccd59ef2bfd..3d1be1e1cce0 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const { return DivergentValues.find(&V) != DivergentValues.end(); } +bool DivergenceAnalysis::isDivergentUse(const Use &U) const { + Value &V = *U.get(); + Instruction &I = *cast<Instruction>(U.getUser()); + return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); +} + void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.empty()) return; @@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const { return DA.isDivergent(val); } +bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const { + return DA.isDivergentUse(use); +} + void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const { OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n"; DA.print(OS, mod); diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp index d9f43dd746ef..024a0fb49950 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp index 49215889cfd6..b374334ea371 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp @@ -233,7 +233,7 @@ void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) { return; if (Strategy == UpdateStrategy::Lazy) { - for (const auto U : Updates) + for (const auto &U : Updates) if (!isSelfDominance(U)) PendUpdates.push_back(U); @@ -253,7 +253,7 @@ void DomTreeUpdater::applyUpdatesPermissive( SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen; SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates; - for (const auto U : Updates) { + for (const auto &U : Updates) { auto Edge = std::make_pair(U.getFrom(), U.getTo()); // Because it is illegal to submit updates that have already been applied // and updates to an edge need to be strictly ordered, diff --git a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp index f9a554acb7ea..14e6965f1259 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp index 0d6c0ffb18a8..4361e0dc9bbd 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -286,7 +287,7 @@ GlobalsAAResult::getFunctionInfo(const Function *F) { void GlobalsAAResult::AnalyzeGlobals(Module &M) { SmallPtrSet<Function *, 32> TrackedFunctions; for (Function &F : M) - if (F.hasLocalLinkage()) + if (F.hasLocalLinkage()) { if (!AnalyzeUsesOfPointer(&F)) { // Remember that we are tracking this global. NonAddressTakenGlobals.insert(&F); @@ -294,7 +295,9 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) { Handles.emplace_front(*this, &F); Handles.front().I = Handles.begin(); ++NumNonAddrTakenFunctions; - } + } else + UnknownFunctionsWithLocalLinkage = true; + } SmallPtrSet<Function *, 16> Readers, Writers; for (GlobalVariable &GV : M.globals()) @@ -370,7 +373,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, // passing into the function. if (Call->isDataOperand(&U)) { // Detect calls to free. - if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) { + if (Call->isArgOperand(&U) && + isFreeCall(I, &GetTLI(*Call->getFunction()))) { if (Writers) Writers->insert(Call->getParent()->getParent()); } else { @@ -432,7 +436,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { Value *Ptr = GetUnderlyingObject(SI->getOperand(0), GV->getParent()->getDataLayout()); - if (!isAllocLikeFn(Ptr, &TLI)) + if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction()))) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -525,9 +529,12 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { FI.setMayReadAnyGlobal(); } else { FI.addModRefInfo(ModRefInfo::ModRef); - // Can't say anything useful unless it's an intrinsic - they don't - // read or write global variables of the kind considered here. - KnowNothing = !F->isIntrinsic(); + if (!F->onlyAccessesArgMemory()) + FI.setMayReadAnyGlobal(); + if (!F->isIntrinsic()) { + KnowNothing = true; + break; + } } continue; } @@ -576,6 +583,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We handle calls specially because the graph-relevant aspects are // handled above. if (auto *Call = dyn_cast<CallBase>(&I)) { + auto &TLI = GetTLI(*Node->getFunction()); if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) { // FIXME: It is completely unclear why this is necessary and not // handled by the above graph code. @@ -925,7 +933,9 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, // global we are tracking, return information if we have it. if (const GlobalValue *GV = dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) - if (GV->hasLocalLinkage()) + // If GV is internal to this IR and there is no function with local linkage + // that has had their address taken, keep looking for a tighter ModRefInfo. + if (GV->hasLocalLinkage() && !UnknownFunctionsWithLocalLinkage) if (const Function *F = Call->getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) if (const FunctionInfo *FI = getFunctionInfo(F)) @@ -937,12 +947,13 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI)); } -GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, - const TargetLibraryInfo &TLI) - : AAResultBase(), DL(DL), TLI(TLI) {} +GlobalsAAResult::GlobalsAAResult( + const DataLayout &DL, + std::function<const TargetLibraryInfo &(Function &F)> GetTLI) + : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {} GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), + : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)), NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)), IndirectGlobals(std::move(Arg.IndirectGlobals)), AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)), @@ -957,10 +968,10 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) GlobalsAAResult::~GlobalsAAResult() {} -/*static*/ GlobalsAAResult -GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, - CallGraph &CG) { - GlobalsAAResult Result(M.getDataLayout(), TLI); +/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule( + Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI, + CallGraph &CG) { + GlobalsAAResult Result(M.getDataLayout(), GetTLI); // Discover which functions aren't recursive, to feed into AnalyzeGlobals. Result.CollectSCCMembership(CG); @@ -977,8 +988,12 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, AnalysisKey GlobalsAA::Key; GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) { - return GlobalsAAResult::analyzeModule(M, - AM.getResult<TargetLibraryAnalysis>(M), + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + return GlobalsAAResult::analyzeModule(M, GetTLI, AM.getResult<CallGraphAnalysis>(M)); } @@ -999,9 +1014,11 @@ GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) { } bool GlobalsAAWrapperPass::runOnModule(Module &M) { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule( - M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), - getAnalysis<CallGraphWrapperPass>().getCallGraph()))); + M, GetTLI, getAnalysis<CallGraphWrapperPass>().getCallGraph()))); return false; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp index cad92f6e56bb..d48283279858 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp @@ -13,19 +13,25 @@ #include "llvm/IR/PatternMatch.h" using namespace llvm; +using namespace llvm::PatternMatch; bool llvm::isGuard(const User *U) { - using namespace llvm::PatternMatch; return match(U, m_Intrinsic<Intrinsic::experimental_guard>()); } +bool llvm::isWidenableBranch(const User *U) { + Value *Condition, *WidenableCondition; + BasicBlock *GuardedBB, *DeoptBB; + return parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB, + DeoptBB); +} + bool llvm::isGuardAsWidenableBranch(const User *U) { Value *Condition, *WidenableCondition; BasicBlock *GuardedBB, *DeoptBB; if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB, DeoptBB)) return false; - using namespace llvm::PatternMatch; for (auto &Insn : *DeoptBB) { if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>())) return true; @@ -38,12 +44,63 @@ bool llvm::isGuardAsWidenableBranch(const User *U) { bool llvm::parseWidenableBranch(const User *U, Value *&Condition, Value *&WidenableCondition, BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) { - using namespace llvm::PatternMatch; - if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)), - IfTrueBB, IfFalseBB))) + + Use *C, *WC; + if (parseWidenableBranch(const_cast<User*>(U), C, WC, IfTrueBB, IfFalseBB)) { + if (C) + Condition = C->get(); + else + Condition = ConstantInt::getTrue(IfTrueBB->getContext()); + WidenableCondition = WC->get(); + return true; + } + return false; +} + +bool llvm::parseWidenableBranch(User *U, Use *&C,Use *&WC, + BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) { + + auto *BI = dyn_cast<BranchInst>(U); + if (!BI || !BI->isConditional()) + return false; + auto *Cond = BI->getCondition(); + if (!Cond->hasOneUse()) + return false; + + IfTrueBB = BI->getSuccessor(0); + IfFalseBB = BI->getSuccessor(1); + + if (match(Cond, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) { + WC = &BI->getOperandUse(0); + C = nullptr; + return true; + } + + // Check for two cases: + // 1) br (i1 (and A, WC())), label %IfTrue, label %IfFalse + // 2) br (i1 (and WC(), B)), label %IfTrue, label %IfFalse + // We do not check for more generalized and trees as we should canonicalize + // to the form above in instcombine. (TODO) + Value *A, *B; + if (!match(Cond, m_And(m_Value(A), m_Value(B)))) return false; - // TODO: At the moment, we only recognize the branch if the WC call in this - // specific position. We should generalize! - return match(WidenableCondition, - m_Intrinsic<Intrinsic::experimental_widenable_condition>()); + auto *And = dyn_cast<Instruction>(Cond); + if (!And) + // Could be a constexpr + return false; + + if (match(A, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) && + A->hasOneUse()) { + WC = &And->getOperandUse(0); + C = &And->getOperandUse(1); + return true; + } + + if (match(B, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) && + B->hasOneUse()) { + WC = &And->getOperandUse(1); + C = &And->getOperandUse(0); + return true; + } + return false; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp index ce285f82f720..ac81cba836f8 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -300,7 +299,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; - if (isa<FPMathOperator>(ReduxDesc.getPatternInst())) + // FIXME: FMF is allowed on phi, but propagation is not handled correctly. + if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) FMF &= ReduxDesc.getPatternInst()->getFastMathFlags(); } @@ -698,25 +698,48 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( // Ensure every user of the phi node is dominated by the previous value. // The dominance requirement ensures the loop vectorizer will not need to // vectorize the initial value prior to the first iteration of the loop. - // TODO: Consider extending this sinking to handle other kinds of instructions - // and expressions, beyond sinking a single cast past Previous. + // TODO: Consider extending this sinking to handle memory instructions and + // phis with multiple users. + + // Returns true, if all users of I are dominated by DominatedBy. + auto allUsesDominatedBy = [DT](Instruction *I, Instruction *DominatedBy) { + return all_of(I->uses(), [DT, DominatedBy](Use &U) { + return DT->dominates(DominatedBy, U); + }); + }; + if (Phi->hasOneUse()) { - auto *I = Phi->user_back(); - if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && - DT->dominates(Previous, I->user_back())) { - if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. - SinkAfter[I] = Previous; + Instruction *I = Phi->user_back(); + + // If the user of the PHI is also the incoming value, we potentially have a + // reduction and which cannot be handled by sinking. + if (Previous == I) + return false; + + // We cannot sink terminator instructions. + if (I->getParent()->getTerminator() == I) + return false; + + // Do not try to sink an instruction multiple times (if multiple operands + // are first order recurrences). + // TODO: We can support this case, by sinking the instruction after the + // 'deepest' previous instruction. + if (SinkAfter.find(I) != SinkAfter.end()) + return false; + + if (DT->dominates(Previous, I)) // We already are good w/o sinking. return true; - } - } - for (User *U : Phi->users()) - if (auto *I = dyn_cast<Instruction>(U)) { - if (!DT->dominates(Previous, I)) - return false; + // We can sink any instruction without side effects, as long as all users + // are dominated by the instruction we are sinking after. + if (I->getParent() == Phi->getParent() && !I->mayHaveSideEffects() && + allUsesDominatedBy(I, Previous)) { + SinkAfter[I] = Previous; + return true; } + } - return true; + return allUsesDominatedBy(Phi, Previous); } /// This function returns the identity element (or neutral element) for diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp index 681a0cf7e981..9432696b5a26 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 6ff840efcb64..dc4cbc371ef4 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <string> #include <utility> @@ -53,7 +54,7 @@ static cl::opt<unsigned> "call callsite")); ICallPromotionAnalysis::ICallPromotionAnalysis() { - ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MaxNumPromotions); + ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumPromotions); } bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp index 0dec146e0465..de83a48aad16 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp @@ -18,9 +18,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -36,6 +36,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -50,7 +51,7 @@ static cl::opt<int> InlineThreshold( cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt<int> HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt<int> @@ -62,7 +63,7 @@ static cl::opt<int> // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt<int> ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt<int> @@ -92,11 +93,13 @@ static cl::opt<bool> OptComputeFullInlineCost( "exceeds the threshold.")); namespace { - +class InlineCostCallAnalyzer; class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { typedef InstVisitor<CallAnalyzer, bool> Base; friend class InstVisitor<CallAnalyzer, bool>; +protected: + virtual ~CallAnalyzer() {} /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; @@ -123,20 +126,86 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// easily cacheable. Instead, use the cover function paramHasAttr. CallBase &CandidateCall; - /// Tunable parameters that control the analysis. - const InlineParams &Params; + /// Extension points for handling callsite features. + /// Called after a basic block was analyzed. + virtual void onBlockAnalyzed(const BasicBlock *BB) {} - /// Upper bound for the inlining cost. Bonuses are being applied to account - /// for speculative "expected profit" of the inlining decision. - int Threshold; + /// Called at the end of the analysis of the callsite. Return the outcome of + /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or + /// the reason it can't. + virtual InlineResult finalizeAnalysis() { return true; } - /// Inlining cost measured in abstract units, accounts for all the - /// instructions expected to be executed for a given function invocation. - /// Instructions that are statically proven to be dead based on call-site - /// arguments are not counted here. - int Cost = 0; + /// Called when we're about to start processing a basic block, and every time + /// we are done processing an instruction. Return true if there is no point in + /// continuing the analysis (e.g. we've determined already the call site is + /// too expensive to inline) + virtual bool shouldStop() { return false; } + + /// Called before the analysis of the callee body starts (with callsite + /// contexts propagated). It checks callsite-specific information. Return a + /// reason analysis can't continue if that's the case, or 'true' if it may + /// continue. + virtual InlineResult onAnalysisStart() { return true; } + + /// Called if the analysis engine decides SROA cannot be done for the given + /// alloca. + virtual void onDisableSROA(AllocaInst *Arg) {} + + /// Called the analysis engine determines load elimination won't happen. + virtual void onDisableLoadElimination() {} + + /// Called to account for a call. + virtual void onCallPenalty() {} + + /// Called to account for the expectation the inlining would result in a load + /// elimination. + virtual void onLoadEliminationOpportunity() {} - bool ComputeFullInlineCost; + /// Called to account for the cost of argument setup for the Call in the + /// callee's body (not the callsite currently under analysis). + virtual void onCallArgumentSetup(const CallBase &Call) {} + + /// Called to account for a load relative intrinsic. + virtual void onLoadRelativeIntrinsic() {} + + /// Called to account for a lowered call. + virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) { + } + + /// Account for a jump table of given size. Return false to stop further + /// processing the switch instruction + virtual bool onJumpTable(unsigned JumpTableSize) { return true; } + + /// Account for a case cluster of given size. Return false to stop further + /// processing of the instruction. + virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; } + + /// Called at the end of processing a switch instruction, with the given + /// number of case clusters. + virtual void onFinalizeSwitch(unsigned JumpTableSize, + unsigned NumCaseCluster) {} + + /// Called to account for any other instruction not specifically accounted + /// for. + virtual void onCommonInstructionSimplification() {} + + /// Start accounting potential benefits due to SROA for the given alloca. + virtual void onInitializeSROAArg(AllocaInst *Arg) {} + + /// Account SROA savings for the AllocaInst value. + virtual void onAggregateSROAUse(AllocaInst *V) {} + + bool handleSROA(Value *V, bool DoNotDisable) { + // Check for SROA candidates in comparisons. + if (auto *SROAArg = getSROAArgForValueOrNull(V)) { + if (DoNotDisable) { + onAggregateSROAUse(SROAArg); + return true; + } + disableSROAForArg(SROAArg); + } + return false; + } bool IsCallerRecursive = false; bool IsRecursiveCall = false; @@ -153,12 +222,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { unsigned NumInstructions = 0; unsigned NumVectorInstructions = 0; - /// Bonus to be applied when percentage of vector instructions in callee is - /// high (see more details in updateThreshold). - int VectorBonus = 0; - /// Bonus to be applied when the callee has only one reachable basic block. - int SingleBBBonus = 0; - /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The /// idea is to propagate any special information we have about arguments to @@ -170,12 +233,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// Keep track of the values which map back (through function arguments) to /// allocas on the caller stack which could be simplified through SROA. - DenseMap<Value *, Value *> SROAArgValues; + DenseMap<Value *, AllocaInst *> SROAArgValues; - /// The mapping of caller Alloca values to their accumulated cost savings. If - /// we have to disable SROA for one of the allocas, this tells us how much - /// cost must be added. - DenseMap<Value *, int> SROAArgCosts; + /// Keep track of Allocas for which we believe we may get SROA optimization. + /// We don't delete entries in SROAArgValue because we still want + /// isAllocaDerivedArg to function correctly. + DenseSet<AllocaInst *> EnabledSROAArgValues; /// Keep track of values which map to a pointer base and constant offset. DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; @@ -192,17 +255,20 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// loads. bool EnableLoadElimination; SmallPtrSet<Value *, 16> LoadAddrSet; - int LoadEliminationCost = 0; + + AllocaInst *getSROAArgForValueOrNull(Value *V) const { + auto It = SROAArgValues.find(V); + if (It == SROAArgValues.end() || + EnabledSROAArgValues.count(It->second) == 0) + return nullptr; + return It->second; + } // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); - bool lookupSROAArgAndCost(Value *V, Value *&Arg, - DenseMap<Value *, int>::iterator &CostIt); - void disableSROA(DenseMap<Value *, int>::iterator CostIt); + void disableSROAForArg(AllocaInst *SROAArg); void disableSROA(Value *V); void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); - void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, - int InstructionCost); void disableLoadElimination(); bool isGEPFree(GetElementPtrInst &GEP); bool canFoldInboundsGEP(GetElementPtrInst &I); @@ -223,32 +289,13 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); - /// Update Threshold based on callsite properties such as callee - /// attributes and callee hotness for PGO builds. The Callee is explicitly - /// passed to support analyzing indirect calls whose target is inferred by - /// analysis. - void updateThreshold(CallBase &Call, Function &Callee); - /// Return true if size growth is allowed when inlining the callee at \p Call. bool allowSizeGrowth(CallBase &Call); - /// Return true if \p Call is a cold callsite. - bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); - - /// Return a higher threshold if \p Call is a hot callsite. - Optional<int> getHotCallSiteThreshold(CallBase &Call, - BlockFrequencyInfo *CallerBFI); - // Custom analysis routines. InlineResult analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues); - /// Handle a capped 'int' increment for Cost. - void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { - assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound"); - Cost = (int)std::min(UpperBound, Cost + Inc); - } - // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. void visit(Module *); @@ -294,18 +341,12 @@ public: std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallBase &Call, const InlineParams &Params) + Function &Callee, CallBase &Call) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), - CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), - ComputeFullInlineCost(OptComputeFullInlineCost || - Params.ComputeFullInlineCost || ORE), - EnableLoadElimination(true) {} - - InlineResult analyzeCall(CallBase &Call); + CandidateCall(Call), EnableLoadElimination(true) {} - int getThreshold() { return Threshold; } - int getCost() { return Cost; } + InlineResult analyze(); // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. @@ -315,12 +356,291 @@ public: unsigned NumConstantPtrCmps = 0; unsigned NumConstantPtrDiffs = 0; unsigned NumInstructionsSimplified = 0; + + void dump(); +}; + +/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note +/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer +class InlineCostCallAnalyzer final : public CallAnalyzer { + const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; + const bool ComputeFullInlineCost; + int LoadEliminationCost = 0; + /// Bonus to be applied when percentage of vector instructions in callee is + /// high (see more details in updateThreshold). + int VectorBonus = 0; + /// Bonus to be applied when the callee has only one reachable basic block. + int SingleBBBonus = 0; + + /// Tunable parameters that control the analysis. + const InlineParams &Params; + + /// Upper bound for the inlining cost. Bonuses are being applied to account + /// for speculative "expected profit" of the inlining decision. + int Threshold = 0; + + /// Attempt to evaluate indirect calls to boost its inline cost. + const bool BoostIndirectCalls; + + /// Inlining cost measured in abstract units, accounts for all the + /// instructions expected to be executed for a given function invocation. + /// Instructions that are statically proven to be dead based on call-site + /// arguments are not counted here. + int Cost = 0; + + bool SingleBB = true; + unsigned SROACostSavings = 0; unsigned SROACostSavingsLost = 0; + /// The mapping of caller Alloca values to their accumulated cost savings. If + /// we have to disable SROA for one of the allocas, this tells us how much + /// cost must be added. + DenseMap<AllocaInst *, int> SROAArgCosts; + + /// Return true if \p Call is a cold callsite. + bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); + + /// Update Threshold based on callsite properties such as callee + /// attributes and callee hotness for PGO builds. The Callee is explicitly + /// passed to support analyzing indirect calls whose target is inferred by + /// analysis. + void updateThreshold(CallBase &Call, Function &Callee); + /// Return a higher threshold if \p Call is a hot callsite. + Optional<int> getHotCallSiteThreshold(CallBase &Call, + BlockFrequencyInfo *CallerBFI); + + /// Handle a capped 'int' increment for Cost. + void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { + assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound"); + Cost = (int)std::min(UpperBound, Cost + Inc); + } + + void onDisableSROA(AllocaInst *Arg) override { + auto CostIt = SROAArgCosts.find(Arg); + if (CostIt == SROAArgCosts.end()) + return; + addCost(CostIt->second); + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); + } + + void onDisableLoadElimination() override { + addCost(LoadEliminationCost); + LoadEliminationCost = 0; + } + void onCallPenalty() override { addCost(InlineConstants::CallPenalty); } + void onCallArgumentSetup(const CallBase &Call) override { + // Pay the price of the argument setup. We account for the average 1 + // instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); + } + void onLoadRelativeIntrinsic() override { + // This is normally lowered to 4 LLVM instructions. + addCost(3 * InlineConstants::InstrCost); + } + void onLoweredCall(Function *F, CallBase &Call, + bool IsIndirectCall) override { + // We account for the average 1 instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan out. + // Pretend to inline the function, with a custom threshold. + if (IsIndirectCall && BoostIndirectCalls) { + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = + InlineConstants::IndirectCallThreshold; + /// FIXME: if InlineCostCallAnalyzer is derived from, this may need + /// to instantiate the derived class. + InlineCostCallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, + Call, IndirectCallParams, false); + if (CA.analyze()) { + // We were able to inline the indirect call! Subtract the cost from the + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + } + } else + // Otherwise simply add the cost for merely making the call. + addCost(InlineConstants::CallPenalty); + } + + void onFinalizeSwitch(unsigned JumpTableSize, + unsigned NumCaseCluster) override { + // If suitable for a jump table, consider the cost for the table size and + // branch to destination. + // Maximum valid cost increased in this function. + if (JumpTableSize) { + int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + + 4 * InlineConstants::InstrCost; + + addCost(JTCost, (int64_t)CostUpperBound); + return; + } + // Considering forming a binary search, we should find the number of nodes + // which is same as the number of comparisons when lowered. For a given + // number of clusters, n, we can define a recursive function, f(n), to find + // the number of nodes in the tree. The recursion is : + // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, + // and f(n) = n, when n <= 3. + // This will lead a binary tree where the leaf should be either f(2) or f(3) + // when n > 3. So, the number of comparisons from leaves should be n, while + // the number of non-leaf should be : + // 2^(log2(n) - 1) - 1 + // = 2^log2(n) * 2^-1 - 1 + // = n / 2 - 1. + // Considering comparisons from leaf and non-leaf nodes, we can estimate the + // number of comparisons in a simple closed form : + // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { + // Suppose a comparison includes one compare and one conditional branch. + addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); + return; + } + + int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; + int64_t SwitchCost = + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + + addCost(SwitchCost, (int64_t)CostUpperBound); + } + void onCommonInstructionSimplification() override { + addCost(InlineConstants::InstrCost); + } + + void onInitializeSROAArg(AllocaInst *Arg) override { + assert(Arg != nullptr && + "Should not initialize SROA costs for null value."); + SROAArgCosts[Arg] = 0; + EnabledSROAArgValues.insert(Arg); + } + + void onAggregateSROAUse(AllocaInst *SROAArg) override { + auto CostIt = SROAArgCosts.find(SROAArg); + assert(CostIt != SROAArgCosts.end() && + "expected this argument to have a cost"); + CostIt->second += InlineConstants::InstrCost; + SROACostSavings += InlineConstants::InstrCost; + } + + void onBlockAnalyzed(const BasicBlock *BB) override { + auto *TI = BB->getTerminator(); + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } + } + InlineResult finalizeAnalysis() override { + // Loops generally act a lot like calls in that they act like barriers to + // movement, require a certain amount of setup, etc. So when optimising for + // size, we penalise any call sites that perform loops. We do this after all + // other costs here, so will likely only be dealing with relatively small + // functions (and hence DT and LI will hopefully be cheap). + auto *Caller = CandidateCall.getFunction(); + if (Caller->hasMinSize()) { + DominatorTree DT(F); + LoopInfo LI(DT); + int NumLoops = 0; + for (Loop *L : LI) { + // Ignore loops that will not be executed + if (DeadBlocks.count(L->getHeader())) + continue; + NumLoops++; + } + addCost(NumLoops * InlineConstants::CallPenalty); + } + + // We applied the maximum possible vector bonus at the beginning. Now, + // subtract the excess bonus, if any, from the Threshold before + // comparing against Cost. + if (NumVectorInstructions <= NumInstructions / 10) + Threshold -= VectorBonus; + else if (NumVectorInstructions <= NumInstructions / 2) + Threshold -= VectorBonus / 2; + + return Cost < std::max(1, Threshold); + } + bool shouldStop() override { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + return Cost >= Threshold && !ComputeFullInlineCost; + } + + void onLoadEliminationOpportunity() override { + LoadEliminationCost += InlineConstants::InstrCost; + } + + InlineResult onAnalysisStart() override { + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. Note that these bonuses are some what arbitrary and evolved over + // time by accident as much as because they are principled bonuses. + // + // FIXME: It would be nice to remove all such bonuses. At least it would be + // nice to base the bonus values on something more scientific. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + + // Update the threshold based on callsite properties + updateThreshold(CandidateCall, F); + + // While Threshold depends on commandline options that can take negative + // values, we want to enforce the invariant that the computed threshold and + // bonuses are non-negative. + assert(Threshold >= 0); + assert(SingleBBBonus >= 0); + assert(VectorBonus >= 0); + + // Speculatively apply all possible bonuses to Threshold. If cost exceeds + // this Threshold any time, and cost cannot decrease, we can stop processing + // the rest of the function body. + Threshold += (SingleBBBonus + VectorBonus); + + // Give out bonuses for the callsite, as the instructions setting them up + // will be gone after inlining. + addCost(-getCallsiteCost(this->CandidateCall, DL)); + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost >= Threshold && !ComputeFullInlineCost) + return "high cost"; + + return true; + } + +public: + InlineCostCallAnalyzer( + const TargetTransformInfo &TTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, + CallBase &Call, const InlineParams &Params, bool BoostIndirect = true) + : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call), + ComputeFullInlineCost(OptComputeFullInlineCost || + Params.ComputeFullInlineCost || ORE), + Params(Params), Threshold(Params.DefaultThreshold), + BoostIndirectCalls(BoostIndirect) {} void dump(); -}; + virtual ~InlineCostCallAnalyzer() {} + int getThreshold() { return Threshold; } + int getCost() { return Cost; } +}; } // namespace /// Test whether the given value is an Alloca-derived function argument. @@ -328,55 +648,21 @@ bool CallAnalyzer::isAllocaDerivedArg(Value *V) { return SROAArgValues.count(V); } -/// Lookup the SROA-candidate argument and cost iterator which V maps to. -/// Returns false if V does not map to a SROA-candidate. -bool CallAnalyzer::lookupSROAArgAndCost( - Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { - if (SROAArgValues.empty() || SROAArgCosts.empty()) - return false; - - DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); - if (ArgIt == SROAArgValues.end()) - return false; - - Arg = ArgIt->second; - CostIt = SROAArgCosts.find(Arg); - return CostIt != SROAArgCosts.end(); -} - -/// Disable SROA for the candidate marked by this cost iterator. -/// -/// This marks the candidate as no longer viable for SROA, and adds the cost -/// savings associated with it back into the inline cost measurement. -void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { - // If we're no longer able to perform SROA we need to undo its cost savings - // and prevent subsequent analysis. - addCost(CostIt->second); - SROACostSavings -= CostIt->second; - SROACostSavingsLost += CostIt->second; - SROAArgCosts.erase(CostIt); +void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { + onDisableSROA(SROAArg); + EnabledSROAArgValues.erase(SROAArg); disableLoadElimination(); } - /// If 'V' maps to a SROA candidate, disable SROA for it. void CallAnalyzer::disableSROA(Value *V) { - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(V, SROAArg, CostIt)) - disableSROA(CostIt); -} - -/// Accumulate the given cost for a particular SROA candidate. -void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, - int InstructionCost) { - CostIt->second += InstructionCost; - SROACostSavings += InstructionCost; + if (auto *SROAArg = getSROAArgForValueOrNull(V)) { + disableSROAForArg(SROAArg); + } } void CallAnalyzer::disableLoadElimination() { if (EnableLoadElimination) { - addCost(LoadEliminationCost); - LoadEliminationCost = 0; + onDisableLoadElimination(); EnableLoadElimination = false; } } @@ -422,9 +708,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { Operands.push_back(GEP.getOperand(0)); for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) - Operands.push_back(SimpleOp); - else - Operands.push_back(*I); + Operands.push_back(SimpleOp); + else + Operands.push_back(*I); return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); } @@ -436,7 +722,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); + AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); return Base::visitAlloca(I); } } @@ -444,7 +731,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); - AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); + AllocatedSize = + SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), AllocatedSize); } // We will happily inline static alloca instructions. @@ -546,9 +834,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) { if (FirstBaseAndOffset.first) { ConstantOffsetPtrs[&I] = FirstBaseAndOffset; - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) + if (auto *SROAArg = getSROAArgForValueOrNull(FirstV)) SROAArgValues[&I] = SROAArg; } @@ -578,10 +864,7 @@ bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { } bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - bool SROACandidate = - lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); + auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand()); // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { @@ -592,7 +875,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { }; if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { - if (SROACandidate) + if (SROAArg) SROAArgValues[&I] = SROAArg; // Constant GEPs are modeled as free. @@ -600,8 +883,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { } // Variable GEPs will require math and will disable SROA. - if (SROACandidate) - disableSROA(CostIt); + if (SROAArg) + disableSROAForArg(SROAArg); return isGEPFree(I); } @@ -641,9 +924,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { ConstantOffsetPtrs[&I] = BaseAndOffset; // Also look for SROA candidates here. - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; // Bitcasts are always zero cost. @@ -675,9 +956,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // and so we can just add the integer in here. The only places where SROA is // preserved either cannot fire on an integer, or won't in-and-of themselves // disable SROA (ext) w/o some later use that we would see and disable. - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); @@ -701,9 +980,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { } // "Propagate" SROA here in the same manner as we do for ptrtoint above. - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); @@ -730,7 +1007,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { case Instruction::FPToUI: case Instruction::FPToSI: if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) - addCost(InlineConstants::CallPenalty); + onCallPenalty(); break; default: break; @@ -803,8 +1080,8 @@ bool CallAnalyzer::allowSizeGrowth(CallBase &Call) { return true; } -bool CallAnalyzer::isColdCallSite(CallBase &Call, - BlockFrequencyInfo *CallerBFI) { +bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call, + BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's coldness is // determined based on that. if (PSI && PSI->hasProfileSummary()) @@ -827,8 +1104,8 @@ bool CallAnalyzer::isColdCallSite(CallBase &Call, } Optional<int> -CallAnalyzer::getHotCallSiteThreshold(CallBase &Call, - BlockFrequencyInfo *CallerBFI) { +InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call, + BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's hotness is // determined based on that. @@ -855,7 +1132,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallBase &Call, return None; } -void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { +void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(Call)) { Threshold = 0; @@ -1017,19 +1294,7 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) { : ConstantInt::getFalse(I.getType()); return true; } - // Finally check for SROA candidates in comparisons. - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (isa<ConstantPointerNull>(I.getOperand(1))) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; + return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1))); } bool CallAnalyzer::visitSub(BinaryOperator &I) { @@ -1070,8 +1335,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *SimpleV = nullptr; if (auto FI = dyn_cast<FPMathOperator>(&I)) - SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, - CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); + SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, + FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); @@ -1093,7 +1358,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { if (I.getType()->isFloatingPointTy() && TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive && !match(&I, m_FNeg(m_Value()))) - addCost(InlineConstants::CallPenalty); + onCallPenalty(); return false; } @@ -1104,9 +1369,8 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) { if (!COp) COp = SimplifiedValues.lookup(Op); - Value *SimpleV = SimplifyFNegInst(COp ? COp : Op, - cast<FPMathOperator>(I).getFastMathFlags(), - DL); + Value *SimpleV = SimplifyFNegInst( + COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL); if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) SimplifiedValues[&I] = C; @@ -1121,23 +1385,15 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) { } bool CallAnalyzer::visitLoad(LoadInst &I) { - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } + if (handleSROA(I.getPointerOperand(), I.isSimple())) + return true; // If the data is already loaded from this address and hasn't been clobbered // by any stores or calls, this load is likely to be redundant and can be // eliminated. if (EnableLoadElimination && !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { - LoadEliminationCost += InlineConstants::InstrCost; + onLoadEliminationOpportunity(); return true; } @@ -1145,16 +1401,8 @@ bool CallAnalyzer::visitLoad(LoadInst &I) { } bool CallAnalyzer::visitStore(StoreInst &I) { - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } + if (handleSROA(I.getPointerOperand(), I.isSimple())) + return true; // The store can potentially clobber loads and prevent repeated loads from // being eliminated. @@ -1236,97 +1484,69 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - if (Function *F = Call.getCalledFunction()) { - // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, Call)) - return true; - - // Next check if it is an intrinsic we know about. - // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { - switch (II->getIntrinsicID()) { - default: - if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) - disableLoadElimination(); - return Base::visitCallBase(Call); - - case Intrinsic::load_relative: - // This is normally lowered to 4 LLVM instructions. - addCost(3 * InlineConstants::InstrCost); - return false; - - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: + Value *Callee = Call.getCalledOperand(); + Function *F = dyn_cast_or_null<Function>(Callee); + bool IsIndirectCall = !F; + if (IsIndirectCall) { + // Check if this happens to be an indirect function call to a known function + // in this inline context. If not, we've done all we can. + F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); + if (!F) { + onCallArgumentSetup(Call); + + if (!Call.onlyReadsMemory()) disableLoadElimination(); - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - case Intrinsic::icall_branch_funnel: - case Intrinsic::localescape: - HasUninlineableIntrinsic = true; - return false; - case Intrinsic::vastart: - InitsVargArgs = true; - return false; - } + return Base::visitCallBase(Call); } + } - if (F == Call.getFunction()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } + assert(F && "Expected a call to a known function"); + + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, Call)) + return true; - if (TTI.isLoweredToCall(F)) { - // We account for the average 1 instruction per call argument setup - // here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { + switch (II->getIntrinsicID()) { + default: + if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); + return Base::visitCallBase(Call); - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa<InlineAsm>(Call.getCalledValue())) - addCost(InlineConstants::CallPenalty); - } + case Intrinsic::load_relative: + onLoadRelativeIntrinsic(); + return false; - if (!Call.onlyReadsMemory()) + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: disableLoadElimination(); - return Base::visitCallBase(Call); + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + case Intrinsic::icall_branch_funnel: + case Intrinsic::localescape: + HasUninlineableIntrinsic = true; + return false; + case Intrinsic::vastart: + InitsVargArgs = true; + return false; + } } - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = Call.getCalledValue(); - - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); - - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); - if (!F) { - if (!Call.onlyReadsMemory()) - disableLoadElimination(); - return Base::visitCallBase(Call); + if (F == Call.getFunction()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; } - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - auto IndirectCallParams = Params; - IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, - IndirectCallParams); - if (CA.analyzeCall(Call)) { - // We were able to inline the indirect call! Subtract the cost from the - // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + if (TTI.isLoweredToCall(F)) { + onLoweredCall(F, Call, IsIndirectCall); } - if (!F->onlyReadsMemory()) + if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory()))) disableLoadElimination(); return Base::visitCallBase(Call); } @@ -1379,9 +1599,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) { if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt)) + if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal)) SROAArgValues[&SI] = SROAArg; return true; } @@ -1420,9 +1638,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) { if (BaseAndOffset.first) { ConstantOffsetPtrs[&SI] = BaseAndOffset; - Value *SROAArg; - DenseMap<Value *, int>::iterator CostIt; - if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt)) + if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV)) SROAArgValues[&SI] = SROAArg; } @@ -1450,62 +1666,12 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // inlining those. It will prevent inlining in cases where the optimization // does not (yet) fire. - // Maximum valid cost increased in this function. - int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; - - // Exit early for a large switch, assuming one case needs at least one - // instruction. - // FIXME: This is not true for a bit test, but ignore such case for now to - // save compile-time. - int64_t CostLowerBound = - std::min((int64_t)CostUpperBound, - (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - - if (CostLowerBound > Threshold && !ComputeFullInlineCost) { - addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost); - return false; - } - unsigned JumpTableSize = 0; + BlockFrequencyInfo *BFI = GetBFI ? &((*GetBFI)(F)) : nullptr; unsigned NumCaseCluster = - TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI); - // If suitable for a jump table, consider the cost for the table size and - // branch to destination. - if (JumpTableSize) { - int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + - 4 * InlineConstants::InstrCost; - - addCost(JTCost, (int64_t)CostUpperBound); - return false; - } - - // Considering forming a binary search, we should find the number of nodes - // which is same as the number of comparisons when lowered. For a given - // number of clusters, n, we can define a recursive function, f(n), to find - // the number of nodes in the tree. The recursion is : - // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, - // and f(n) = n, when n <= 3. - // This will lead a binary tree where the leaf should be either f(2) or f(3) - // when n > 3. So, the number of comparisons from leaves should be n, while - // the number of non-leaf should be : - // 2^(log2(n) - 1) - 1 - // = 2^log2(n) * 2^-1 - 1 - // = n / 2 - 1. - // Considering comparisons from leaf and non-leaf nodes, we can estimate the - // number of comparisons in a simple closed form : - // n + n / 2 - 1 = n * 3 / 2 - 1 - if (NumCaseCluster <= 3) { - // Suppose a comparison includes one compare and one conditional branch. - addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); - return false; - } - - int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; - int64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - - addCost(SwitchCost, (int64_t)CostUpperBound); + onFinalizeSwitch(JumpTableSize, NumCaseCluster); return false; } @@ -1598,7 +1764,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, if (Base::visit(&*I)) ++NumInstructionsSimplified; else - addCost(InlineConstants::InstrCost); + onCommonInstructionSimplification(); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. @@ -1643,9 +1809,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB, return IR; } - // Check if we've passed the maximum possible threshold so we don't spin in - // huge basic blocks that will never inline. - if (Cost >= Threshold && !ComputeFullInlineCost) + if (shouldStop()) return false; } @@ -1687,8 +1851,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); - Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); - return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); + Type *IdxPtrTy = DL.getIndexType(V->getType()); + return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset)); } /// Find dead blocks due to deleted CFG edges during inlining. @@ -1736,54 +1900,17 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. -InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { +InlineResult CallAnalyzer::analyze() { ++NumCallsAnalyzed; - // Perform some tweaks to the cost and threshold based on the direct - // callsite information. - - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. Note that these bonuses are some what arbitrary and evolved over time - // by accident as much as because they are principled bonuses. - // - // FIXME: It would be nice to remove all such bonuses. At least it would be - // nice to base the bonus values on something more scientific. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - - // Update the threshold based on callsite properties - updateThreshold(Call, F); - - // While Threshold depends on commandline options that can take negative - // values, we want to enforce the invariant that the computed threshold and - // bonuses are non-negative. - assert(Threshold >= 0); - assert(SingleBBBonus >= 0); - assert(VectorBonus >= 0); - - // Speculatively apply all possible bonuses to Threshold. If cost exceeds - // this Threshold any time, and cost cannot decrease, we can stop processing - // the rest of the function body. - Threshold += (SingleBBBonus + VectorBonus); - - // Give out bonuses for the callsite, as the instructions setting them up - // will be gone after inlining. - addCost(-getCallsiteCost(Call, DL)); - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; - - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost >= Threshold && !ComputeFullInlineCost) - return "high cost"; + auto Result = onAnalysisStart(); + if (!Result) + return Result; if (F.empty()) return true; - Function *Caller = Call.getFunction(); + Function *Caller = CandidateCall.getFunction(); // Check if the caller function is recursive itself. for (User *U : Caller->users()) { CallBase *Call = dyn_cast<CallBase>(U); @@ -1795,10 +1922,10 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. - auto CAI = Call.arg_begin(); + auto CAI = CandidateCall.arg_begin(); for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); FAI != FAE; ++FAI, ++CAI) { - assert(CAI != Call.arg_end()); + assert(CAI != CandidateCall.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) SimplifiedValues[&*FAI] = C; @@ -1807,9 +1934,9 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. - if (isa<AllocaInst>(PtrArg)) { - SROAArgValues[&*FAI] = PtrArg; - SROAArgCosts[PtrArg] = 0; + if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) { + SROAArgValues[&*FAI] = SROAArg; + onInitializeSROAArg(SROAArg); } } } @@ -1835,12 +1962,10 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); - bool SingleBB = true; + // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { - // Bail out the moment we cross the threshold. This means we'll under-count - // the cost, but only when undercounting doesn't matter. - if (Cost >= Threshold && !ComputeFullInlineCost) + if (shouldStop()) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1900,57 +2025,23 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) { ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); - // If we had any successors at this point, than post-inlining is likely to - // have them as well. Note that we assume any basic blocks which existed - // due to branches or switches which folded above will also fold after - // inlining. - if (SingleBB && TI->getNumSuccessors() > 1) { - // Take off the bonus we applied to the threshold. - Threshold -= SingleBBBonus; - SingleBB = false; - } + onBlockAnalyzed(BB); } - bool OnlyOneCallAndLocalLinkage = - F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction(); + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CandidateCall.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return "noduplicate"; - // Loops generally act a lot like calls in that they act like barriers to - // movement, require a certain amount of setup, etc. So when optimising for - // size, we penalise any call sites that perform loops. We do this after all - // other costs here, so will likely only be dealing with relatively small - // functions (and hence DT and LI will hopefully be cheap). - if (Caller->hasMinSize()) { - DominatorTree DT(F); - LoopInfo LI(DT); - int NumLoops = 0; - for (Loop *L : LI) { - // Ignore loops that will not be executed - if (DeadBlocks.count(L->getHeader())) - continue; - NumLoops++; - } - addCost(NumLoops * InlineConstants::CallPenalty); - } - - // We applied the maximum possible vector bonus at the beginning. Now, - // subtract the excess bonus, if any, from the Threshold before - // comparing against Cost. - if (NumVectorInstructions <= NumInstructions / 10) - Threshold -= VectorBonus; - else if (NumVectorInstructions <= NumInstructions / 2) - Threshold -= VectorBonus/2; - - return Cost < std::max(1, Threshold); + return finalizeAnalysis(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Dump stats about this call's analysis. -LLVM_DUMP_METHOD void CallAnalyzer::dump() { +LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); @@ -2084,9 +2175,9 @@ InlineCost llvm::getInlineCost( LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Caller->getName() << ")\n"); - CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, - Call, Params); - InlineResult ShouldInline = CA.analyzeCall(Call); + InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, + *Callee, Call, Params); + InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); @@ -2132,16 +2223,17 @@ InlineResult llvm::isInlineViable(Function &F) { switch (Call->getCalledFunction()->getIntrinsicID()) { default: break; - // Disallow inlining of @llvm.icall.branch.funnel because current - // backend can't separate call targets from call arguments. case llvm::Intrinsic::icall_branch_funnel: + // Disallow inlining of @llvm.icall.branch.funnel because current + // backend can't separate call targets from call arguments. return "disallowed inlining of @llvm.icall.branch.funnel"; - // Disallow inlining functions that call @llvm.localescape. Doing this - // correctly would require major changes to the inliner. case llvm::Intrinsic::localescape: + // Disallow inlining functions that call @llvm.localescape. Doing this + // correctly would require major changes to the inliner. return "disallowed inlining of @llvm.localescape"; - // Disallow inlining of functions that initialize VarArgs with va_start. case llvm::Intrinsic::vastart: + // Disallow inlining of functions that initialize VarArgs with + // va_start. return "contains VarArgs initialized with va_start"; } } @@ -2184,7 +2276,8 @@ InlineParams llvm::getInlineParams(int Threshold) { if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; - // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. + // Set the ColdCallSiteThreshold knob from the + // -inline-cold-callsite-threshold. Params.ColdCallSiteThreshold = ColdCallSiteThreshold; // Set the OptMinSizeThreshold and OptSizeThreshold params only if the diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp index 943a99a5f46d..bb9c7b7eb11f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp index 35190ce3e11a..415797d6a378 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/InstructionPrecedenceTracking.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 941a68c5e6fd..d7510c899101 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, + const SimplifyQuery &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -137,6 +137,71 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, CRHS == LHS; } +/// Simplify comparison with true or false branch of select: +/// %sel = select i1 %cond, i32 %tv, i32 %fv +/// %cmp = icmp sle i32 %sel, %rhs +/// Compose new comparison by substituting %sel with either %tv or %fv +/// and see if it simplifies. +static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, Value *Cond, + const SimplifyQuery &Q, unsigned MaxRecurse, + Constant *TrueOrFalse) { + Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse); + if (SimplifiedCmp == Cond) { + // %cmp simplified to the select condition (%cond). + return TrueOrFalse; + } else if (!SimplifiedCmp && isSameCompare(Cond, Pred, LHS, RHS)) { + // It didn't simplify. However, if composed comparison is equivalent + // to the select condition (%cond) then we can replace it. + return TrueOrFalse; + } + return SimplifiedCmp; +} + +/// Simplify comparison with true branch of select +static Value *simplifyCmpSelTrueCase(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, Value *Cond, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse, + getTrue(Cond->getType())); +} + +/// Simplify comparison with false branch of select +static Value *simplifyCmpSelFalseCase(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, Value *Cond, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse, + getFalse(Cond->getType())); +} + +/// We know comparison with both branches of select can be simplified, but they +/// are not equal. This routine handles some logical simplifications. +static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp, + Value *Cond, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = SimplifyXorInst( + Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse)) + return V; + return nullptr; +} + /// Does the given value dominate the specified phi node? static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); @@ -398,6 +463,12 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// In the case of a comparison with a select instruction, try to simplify the /// comparison by seeing whether both branches of the select result in the same /// value. Returns the common value if so, otherwise returns null. +/// For example, if we have: +/// %tmp = select i1 %cmp, i32 1, i32 2 +/// %cmp1 = icmp sle i32 %tmp, 3 +/// We can simplify %cmp1 to true, because both branches of select are +/// less than 3. We compose new comparison by substituting %tmp with both +/// branches of select and see if it can be simplified. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -418,32 +489,14 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); - if (TCmp == Cond) { - // It not only simplified, it simplified to the select condition. Replace - // it with 'true'. - TCmp = getTrue(Cond->getType()); - } else if (!TCmp) { - // It didn't simplify. However if "cmp TV, RHS" is equal to the select - // condition then we can replace it with 'true'. Otherwise give up. - if (!isSameCompare(Cond, Pred, TV, RHS)) - return nullptr; - TCmp = getTrue(Cond->getType()); - } + Value *TCmp = simplifyCmpSelTrueCase(Pred, TV, RHS, Cond, Q, MaxRecurse); + if (!TCmp) + return nullptr; // Does "cmp FV, RHS" simplify? - Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); - if (FCmp == Cond) { - // It not only simplified, it simplified to the select condition. Replace - // it with 'false'. - FCmp = getFalse(Cond->getType()); - } else if (!FCmp) { - // It didn't simplify. However if "cmp FV, RHS" is equal to the select - // condition then we can replace it with 'false'. Otherwise give up. - if (!isSameCompare(Cond, Pred, FV, RHS)) - return nullptr; - FCmp = getFalse(Cond->getType()); - } + Value *FCmp = simplifyCmpSelFalseCase(Pred, FV, RHS, Cond, Q, MaxRecurse); + if (!FCmp) + return nullptr; // If both sides simplified to the same value, then use it as the result of // the original comparison. @@ -452,26 +505,8 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // The remaining cases only make sense if the select condition has the same // type as the result of the comparison, so bail out if this is not so. - if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) - return nullptr; - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - Q, MaxRecurse)) - return V; + if (Cond->getType()->isVectorTy() == RHS->getType()->isVectorTy()) + return handleOtherCmpSelSimplifications(TCmp, FCmp, Cond, Q, MaxRecurse); return nullptr; } @@ -543,10 +578,16 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; - for (Value *Incoming : PI->incoming_values()) { + for (unsigned u = 0, e = PI->getNumIncomingValues(); u < e; ++u) { + Value *Incoming = PI->getIncomingValue(u); + Instruction *InTI = PI->getIncomingBlock(u)->getTerminator(); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); + // Change the context instruction to the "edge" that flows into the phi. + // This is important because that is where incoming is actually "evaluated" + // even though it is used later somewhere else. + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI), + MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -656,16 +697,16 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { assert(V->getType()->isPtrOrPtrVectorTy()); - Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); - APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); + Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType(); + APInt Offset = APInt::getNullValue(IntIdxTy->getIntegerBitWidth()); V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds); // As that strip may trace through `addrspacecast`, need to sext or trunc // the offset calculated. - IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); - Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth()); + IntIdxTy = DL.getIndexType(V->getType())->getScalarType(); + Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth()); - Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset); if (V->getType()->isVectorTy()) return ConstantVector::getSplat(V->getType()->getVectorNumElements(), OffsetIntPtr); @@ -1371,7 +1412,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, - ICmpInst *UnsignedICmp, bool IsAnd) { + ICmpInst *UnsignedICmp, bool IsAnd, + const SimplifyQuery &Q) { Value *X, *Y; ICmpInst::Predicate EqPred; @@ -1380,6 +1422,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, return nullptr; ICmpInst::Predicate UnsignedPred; + + Value *A, *B; + // Y = (A - B); + if (match(Y, m_Sub(m_Value(A), m_Value(B)))) { + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) && + ICmpInst::isUnsigned(UnsignedPred)) { + if (UnsignedICmp->getOperand(0) != A) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + // A >=/<= B || (A - B) != 0 <--> true + if ((UnsignedPred == ICmpInst::ICMP_UGE || + UnsignedPred == ICmpInst::ICMP_ULE) && + EqPred == ICmpInst::ICMP_NE && !IsAnd) + return ConstantInt::getTrue(UnsignedICmp->getType()); + // A </> B && (A - B) == 0 <--> false + if ((UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT) && + EqPred == ICmpInst::ICMP_EQ && IsAnd) + return ConstantInt::getFalse(UnsignedICmp->getType()); + + // A </> B && (A - B) != 0 <--> A </> B + // A </> B || (A - B) != 0 <--> (A - B) != 0 + if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // A <=/>= B && (A - B) == 0 <--> (A - B) == 0 + // A <=/>= B || (A - B) == 0 <--> A <=/>= B + if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE || + UnsignedPred == ICmpInst::ICMP_UGE)) + return IsAnd ? ZeroICmp : UnsignedICmp; + } + + // Given Y = (A - B) + // Y >= A && Y != 0 --> Y >= A iff B != 0 + // Y < A || Y == 0 --> Y < A iff B != 0 + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) { + if (UnsignedICmp->getOperand(0) != Y) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd && + EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd && + EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + } + } + if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && ICmpInst::isUnsigned(UnsignedPred)) ; @@ -1395,19 +1490,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) return IsAnd ? UnsignedICmp : ZeroICmp; - // X >= Y || Y != 0 --> true + // X <= Y && Y != 0 --> X <= Y iff X != 0 + // X <= Y || Y != 0 --> Y != 0 iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // X >= Y && Y == 0 --> Y == 0 // X >= Y || Y == 0 --> X >= Y - if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) { - if (EqPred == ICmpInst::ICMP_NE) - return getTrue(UnsignedICmp->getType()); - return UnsignedICmp; - } + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ) + return IsAnd ? ZeroICmp : UnsignedICmp; + + // X > Y && Y == 0 --> Y == 0 iff X != 0 + // X > Y || Y == 0 --> X > Y iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? ZeroICmp : UnsignedICmp; // X < Y && Y == 0 --> false if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && IsAnd) return getFalse(UnsignedICmp->getType()); + // X >= Y || Y != 0 --> true + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE && + !IsAnd) + return getTrue(UnsignedICmp->getType()); + return nullptr; } @@ -1587,10 +1696,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q)) return X; if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) @@ -1604,9 +1713,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1660,10 +1769,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q)) return X; if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) @@ -1677,9 +1786,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1738,8 +1847,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, auto *ICmp0 = dyn_cast<ICmpInst>(Op0); auto *ICmp1 = dyn_cast<ICmpInst>(Op1); if (ICmp0 && ICmp1) - V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ) - : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ); + V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q) + : simplifyOrOfICmps(ICmp0, ICmp1, Q); auto *FCmp0 = dyn_cast<FCmpInst>(Op0); auto *FCmp1 = dyn_cast<FCmpInst>(Op1); @@ -1759,6 +1868,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, return nullptr; } +/// Check that the Op1 is in expected form, i.e.: +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1, + Value *X) { + auto *Extract = dyn_cast<ExtractValueInst>(Op1); + // We should only be extracting the overflow bit. + if (!Extract || !Extract->getIndices().equals(1)) + return false; + Value *Agg = Extract->getAggregateOperand(); + // This should be a multiplication-with-overflow intrinsic. + if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(), + m_Intrinsic<Intrinsic::smul_with_overflow>()))) + return false; + // One of its multipliers should be the value we checked for zero before. + if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)), + m_Argument<1>(m_Specific(X))))) + return false; + return true; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp ne i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %??? = and i1 %Op0, %Op1 +/// We can just return %Op1 +static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_NE) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the overflow bit. + return Op1; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp eq i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %NotOp1 = xor i1 %Op1, true +/// %or = or i1 %Op0, %NotOp1 +/// We can just return %NotOp1 +static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, + Value *NotOp1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_EQ) + return nullptr; + // We expect the other hand of an 'or' to be a 'not'. + Value *Op1; + if (!match(NotOp1, m_Not(m_Value(Op1)))) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the inverted overflow bit. + return NotOp1; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -1813,6 +1993,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; } + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0)) + return V; + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { @@ -1987,6 +2175,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0)) + return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) @@ -3529,6 +3725,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // %sel = select i1 %cmp, i32 -2147483648, i32 %add // // We can't replace %sel with %add unless we strip away the flags. + // TODO: This is an unusual limitation because better analysis results in + // worse simplification. InstCombine can do this fold more generally + // by dropping the flags. Remove this fold to save compile-time? if (isa<OverflowingBinaryOperator>(B)) if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B)) return nullptr; @@ -3745,18 +3944,21 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, /// Try to simplify a select instruction when its condition operand is a /// floating-point comparison. -static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) { +static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, + const SimplifyQuery &Q) { FCmpInst::Predicate Pred; if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) && !match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T)))) return nullptr; - // TODO: The transform may not be valid with -0.0. An incomplete way of - // testing for that possibility is to check if at least one operand is a - // non-zero constant. + // This transform is safe if we do not have (do not care about) -0.0 or if + // at least one operand is known to not be -0.0. Otherwise, the select can + // change the sign of a zero operand. + bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) && + Q.CxtI->hasNoSignedZeros(); const APFloat *C; - if ((match(T, m_APFloat(C)) && C->isNonZero()) || - (match(F, m_APFloat(C)) && C->isNonZero())) { + if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) || + (match(F, m_APFloat(C)) && C->isNonZero())) { // (T == F) ? T : F --> F // (F == T) ? T : F --> F if (Pred == FCmpInst::FCMP_OEQ) @@ -3794,6 +3996,15 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, return FalseVal; } + // select i1 Cond, i1 true, i1 false --> i1 Cond + assert(Cond->getType()->isIntOrIntVectorTy(1) && + "Select must have bool or bool vector condition"); + assert(TrueVal->getType() == FalseVal->getType() && + "Select must have same types for true/false ops"); + if (Cond->getType() == TrueVal->getType() && + match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt())) + return Cond; + // select ?, X, X -> X if (TrueVal == FalseVal) return TrueVal; @@ -3807,7 +4018,7 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; - if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal)) + if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal, Q)) return V; if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal)) @@ -3865,7 +4076,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == - Q.DL.getIndexSizeInBits(AS)) { + Q.DL.getPointerSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); @@ -4250,6 +4461,30 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); } + // A splat of an inserted scalar constant becomes a vector constant: + // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...> + // NOTE: We may have commuted above, so analyze the updated Indices, not the + // original mask constant. + Constant *C; + ConstantInt *IndexC; + if (match(Op0, m_InsertElement(m_Value(), m_Constant(C), + m_ConstantInt(IndexC)))) { + // Match a splat shuffle mask of the insert index allowing undef elements. + int InsertIndex = IndexC->getZExtValue(); + if (all_of(Indices, [InsertIndex](int MaskElt) { + return MaskElt == InsertIndex || MaskElt == -1; + })) { + assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat"); + + // Shuffle mask undefs become undefined constant result elements. + SmallVector<Constant *, 16> VecC(MaskNumElts, C); + for (unsigned i = 0; i != MaskNumElts; ++i) + if (Indices[i] == -1) + VecC[i] = UndefValue::get(C->getType()); + return ConstantVector::get(VecC); + } + } + // A shuffle of a splat is always the splat itself. Legal if the shuffle's // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) @@ -4324,14 +4559,16 @@ static Constant *propagateNaN(Constant *In) { return In; } -static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { - if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) - return ConstantFP::getNaN(Op0->getType()); +/// Perform folds that are common to any floating-point operation. This implies +/// transforms based on undef/NaN because the operation itself makes no +/// difference to the result. +static Constant *simplifyFPOp(ArrayRef<Value *> Ops) { + if (any_of(Ops, [](Value *V) { return isa<UndefValue>(V); })) + return ConstantFP::getNaN(Ops[0]->getType()); - if (match(Op0, m_NaN())) - return propagateNaN(cast<Constant>(Op0)); - if (match(Op1, m_NaN())) - return propagateNaN(cast<Constant>(Op1)); + for (Value *V : Ops) + if (match(V, m_NaN())) + return propagateNaN(cast<Constant>(V)); return nullptr; } @@ -4343,7 +4580,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fadd X, -0 ==> X @@ -4390,7 +4627,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fsub X, +0 ==> X @@ -4430,23 +4667,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -/// Given the operands for an FMul, see if we can fold the result -static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) - return C; - - if (Constant *C = simplifyFPBinop(Op0, Op1)) +static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) return Op0; + // fmul 1.0, X ==> X + if (match(Op0, m_FPOne())) + return Op1; + // fmul nnan nsz X, 0 ==> 0 if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); + // fmul nnan nsz 0, X ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP())) + return ConstantFP::getNullValue(Op1->getType()); + // sqrt(X) * sqrt(X) --> X, if we can: // 1. Remove the intermediate rounding (reassociate). // 2. Ignore non-zero negative numbers because sqrt would produce NAN. @@ -4459,6 +4700,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) + return C; + + // Now apply simplifications that do not require rounding. + return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse); +} + Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); @@ -4475,12 +4726,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } +Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit); +} + static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // X / 1.0 -> X @@ -4525,7 +4781,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. @@ -4564,8 +4820,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, /// Given the operand for a UnaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. +/// Try to use FastMathFlags when folding the result. static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -4581,8 +4836,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); } -Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q) { +Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); } @@ -4634,11 +4889,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const SimplifyQuery &Q, - unsigned MaxRecurse) { +/// Try to use FastMathFlags when folding the result. +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const SimplifyQuery &Q, + unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); @@ -4658,9 +4912,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } -Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. @@ -4906,6 +5160,16 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return Op0; } break; + case Intrinsic::copysign: + // copysign X, X --> X + if (Op0 == Op1) + return Op0; + // copysign -X, X --> X + // copysign X, -X --> -X + if (match(Op0, m_FNeg(m_Specific(Op1))) || + match(Op1, m_FNeg(m_Specific(Op0)))) + return Op1; + break; case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::maximum: @@ -5009,6 +5273,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } return nullptr; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Op0 = Call->getArgOperand(0); + Value *Op1 = Call->getArgOperand(1); + Value *Op2 = Call->getArgOperand(2); + if (Value *V = simplifyFPOp({ Op0, Op1, Op2 })) + return V; + return nullptr; + } default: return nullptr; } @@ -5046,6 +5319,19 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } +/// Given operands for a Freeze, see if we can fold the result. +static Value *SimplifyFreezeInst(Value *Op0) { + // Use a utility function defined in ValueTracking. + if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0)) + return Op0; + // We have room for improvement. + return nullptr; +} + +Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { + return ::SimplifyFreezeInst(Op0); +} + /// See if we can compute a simplified version of this instruction. /// If not, this returns null. @@ -5188,6 +5474,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, Result = SimplifyCall(cast<CallInst>(I), Q); break; } + case Instruction::Freeze: + Result = SimplifyFreezeInst(I->getOperand(0), Q); + break; #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST @@ -5308,7 +5597,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr; auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>(); auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; return {F.getParent()->getDataLayout(), TLI, DT, AC}; diff --git a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp index d12db010db6a..23ff4fd6f85e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/IntervalPartition.h" #include "llvm/Analysis/Interval.h" #include "llvm/Analysis/IntervalIterator.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include <cassert> #include <utility> @@ -22,6 +23,10 @@ using namespace llvm; char IntervalPartition::ID = 0; +IntervalPartition::IntervalPartition() : FunctionPass(ID) { + initializeIntervalPartitionPass(*PassRegistry::getPassRegistry()); +} + INITIALIZE_PASS(IntervalPartition, "intervals", "Interval Partition Construction", true, true) diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp index 439758560284..6107cacb9533 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp index f2592c26b373..83698598e156 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -55,8 +56,9 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI); + TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + LBPI = std::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI); return false; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp index 797fcf516429..ef31c1e0ba8c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp @@ -150,7 +150,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); } -LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { +LazyCallGraph::LazyCallGraph( + Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() << "\n"); for (Function &F : M) { @@ -159,7 +160,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { // If this function is a known lib function to LLVM then we want to // synthesize reference edges to it to model the fact that LLVM can turn // arbitrary code into a library function call. - if (isKnownLibFunction(F, TLI)) + if (isKnownLibFunction(F, GetTLI(F))) LibFunctions.insert(&F); if (F.hasLocalLinkage()) @@ -631,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall( // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. - if (empty(MergeRange)) { + if (MergeRange.empty()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); return false; // No new cycle. @@ -1751,16 +1752,14 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) { } static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) { - ptrdiff_t Size = size(C); - OS << " SCC with " << Size << " functions:\n"; + OS << " SCC with " << C.size() << " functions:\n"; for (LazyCallGraph::Node &N : C) OS << " " << N.getFunction().getName() << "\n"; } static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) { - ptrdiff_t Size = size(C); - OS << " RefSCC with " << Size << " call SCCs:\n"; + OS << " RefSCC with " << C.size() << " call SCCs:\n"; for (LazyCallGraph::SCC &InnerC : C) printSCC(OS, InnerC); diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp index 542ff709d475..bad2de9e5f5e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp @@ -19,8 +19,8 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueLattice.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" @@ -33,6 +33,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" @@ -47,6 +48,9 @@ using namespace PatternMatch; static const unsigned MaxProcessedPerValue = 500; char LazyValueInfoWrapperPass::ID = 0; +LazyValueInfoWrapperPass::LazyValueInfoWrapperPass() : FunctionPass(ID) { + initializeLazyValueInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info", "Lazy Value Information Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -188,7 +192,7 @@ namespace { else { auto It = ValueCache.find_as(Val); if (It == ValueCache.end()) { - ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this); + ValueCache[Val] = std::make_unique<ValueCacheEntryTy>(Val, this); It = ValueCache.find_as(Val); assert(It != ValueCache.end() && "Val was just added to the map!"); } @@ -432,8 +436,12 @@ namespace { BasicBlock *BB); bool solveBlockValueOverflowIntrinsic( ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB); + bool solveBlockValueSaturatingIntrinsic(ValueLatticeElement &BBLV, + SaturatingInst *SI, BasicBlock *BB); bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB); + bool solveBlockValueExtractValue(ValueLatticeElement &BBLV, + ExtractValueInst *EVI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, ValueLatticeElement &BBLV, Instruction *BBI); @@ -648,9 +656,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, return solveBlockValueBinaryOp(Res, BO, BB); if (auto *EVI = dyn_cast<ExtractValueInst>(BBI)) - if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) - if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) - return solveBlockValueOverflowIntrinsic(Res, WO, BB); + return solveBlockValueExtractValue(Res, EVI, BB); if (auto *II = dyn_cast<IntrinsicInst>(BBI)) return solveBlockValueIntrinsic(Res, II, BB); @@ -1090,8 +1096,22 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, return true; } - return solveBlockValueBinaryOpImpl(BBLV, BO, BB, - [BO](const ConstantRange &CR1, const ConstantRange &CR2) { + if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO)) { + unsigned NoWrapKind = 0; + if (OBO->hasNoUnsignedWrap()) + NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap; + if (OBO->hasNoSignedWrap()) + NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap; + + return solveBlockValueBinaryOpImpl( + BBLV, BO, BB, + [BO, NoWrapKind](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.overflowingBinaryOp(BO->getOpcode(), CR2, NoWrapKind); + }); + } + + return solveBlockValueBinaryOpImpl( + BBLV, BO, BB, [BO](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.binaryOp(BO->getOpcode(), CR2); }); } @@ -1104,35 +1124,71 @@ bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic( }); } -bool LazyValueInfoImpl::solveBlockValueIntrinsic( - ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) { - switch (II->getIntrinsicID()) { +bool LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic( + ValueLatticeElement &BBLV, SaturatingInst *SI, BasicBlock *BB) { + switch (SI->getIntrinsicID()) { case Intrinsic::uadd_sat: - return solveBlockValueBinaryOpImpl(BBLV, II, BB, - [](const ConstantRange &CR1, const ConstantRange &CR2) { + return solveBlockValueBinaryOpImpl( + BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.uadd_sat(CR2); }); case Intrinsic::usub_sat: - return solveBlockValueBinaryOpImpl(BBLV, II, BB, - [](const ConstantRange &CR1, const ConstantRange &CR2) { + return solveBlockValueBinaryOpImpl( + BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.usub_sat(CR2); }); case Intrinsic::sadd_sat: - return solveBlockValueBinaryOpImpl(BBLV, II, BB, - [](const ConstantRange &CR1, const ConstantRange &CR2) { + return solveBlockValueBinaryOpImpl( + BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.sadd_sat(CR2); }); case Intrinsic::ssub_sat: - return solveBlockValueBinaryOpImpl(BBLV, II, BB, - [](const ConstantRange &CR1, const ConstantRange &CR2) { + return solveBlockValueBinaryOpImpl( + BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.ssub_sat(CR2); }); default: - LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined (unknown intrinsic).\n"); - BBLV = ValueLatticeElement::getOverdefined(); + llvm_unreachable("All llvm.sat intrinsic are handled."); + } +} + +bool LazyValueInfoImpl::solveBlockValueIntrinsic(ValueLatticeElement &BBLV, + IntrinsicInst *II, + BasicBlock *BB) { + if (auto *SI = dyn_cast<SaturatingInst>(II)) + return solveBlockValueSaturatingIntrinsic(BBLV, SI, BB); + + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown intrinsic).\n"); + BBLV = ValueLatticeElement::getOverdefined(); + return true; +} + +bool LazyValueInfoImpl::solveBlockValueExtractValue( + ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) { + if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) + if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) + return solveBlockValueOverflowIntrinsic(BBLV, WO, BB); + + // Handle extractvalue of insertvalue to allow further simplification + // based on replaced with.overflow intrinsics. + if (Value *V = SimplifyExtractValueInst( + EVI->getAggregateOperand(), EVI->getIndices(), + EVI->getModule()->getDataLayout())) { + if (!hasBlockValue(V, BB)) { + if (pushBlockValue({ BB, V })) + return false; + BBLV = ValueLatticeElement::getOverdefined(); + return true; + } + BBLV = getBlockValue(V, BB); return true; } + + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown extractvalue).\n"); + BBLV = ValueLatticeElement::getOverdefined(); + return true; } static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, @@ -1575,7 +1631,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); Info.DT = DTWP ? &DTWP->getDomTree() : nullptr; - Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); if (Info.PImpl) getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp index 52212e1c42aa..0f274429f11f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -64,10 +64,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -75,6 +75,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -93,8 +95,9 @@ namespace { class DivergencePropagator { public: DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - PostDominatorTree &PDT, DenseSet<const Value *> &DV) - : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + PostDominatorTree &PDT, DenseSet<const Value *> &DV, + DenseSet<const Use *> &DU) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -118,11 +121,14 @@ private: PostDominatorTree &PDT; std::vector<Value *> Worklist; // Stack for DFS. DenseSet<const Value *> &DV; // Stores all divergent values. + DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform + // values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { Worklist.clear(); DV.clear(); + DU.clear(); for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { Worklist.push_back(&I); @@ -197,8 +203,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { // dominators of TI until it is outside the influence region. BasicBlock *InfluencedBB = ThisBB; while (InfluenceRegion.count(InfluencedBB)) { - for (auto &I : *InfluencedBB) - findUsersOutsideInfluenceRegion(I, InfluenceRegion); + for (auto &I : *InfluencedBB) { + if (!DV.count(&I)) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + } DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); if (IDomNode == nullptr) break; @@ -208,9 +216,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { void DivergencePropagator::findUsersOutsideInfluenceRegion( Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) { - for (User *U : I.users()) { - Instruction *UserInst = cast<Instruction>(U); + for (Use &Use : I.uses()) { + Instruction *UserInst = cast<Instruction>(Use.getUser()); if (!InfluenceRegion.count(UserInst->getParent())) { + DU.insert(&Use); if (DV.insert(UserInst).second) Worklist.push_back(UserInst); } @@ -250,9 +259,8 @@ void DivergencePropagator::computeInfluenceRegion( void DivergencePropagator::exploreDataDependency(Value *V) { // Follow def-use chains of V. for (User *U : V->users()) { - Instruction *UserInst = cast<Instruction>(U); - if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) - Worklist.push_back(UserInst); + if (!TTI.isAlwaysUniform(U) && DV.insert(U).second) + Worklist.push_back(U); } } @@ -275,6 +283,9 @@ void DivergencePropagator::propagate() { // Register this pass. char LegacyDivergenceAnalysis::ID = 0; +LegacyDivergenceAnalysis::LegacyDivergenceAnalysis() : FunctionPass(ID) { + initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); +} INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) @@ -320,6 +331,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { return false; DivergentValues.clear(); + DivergentUses.clear(); gpuDA = nullptr; auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -328,11 +340,11 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { if (shouldUseGPUDivergenceAnalysis(F)) { // run the new GPU divergence analysis auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI); + gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI); } else { // run LLVM's existing DivergenceAnalysis - DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues); + DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); DP.populateWithSourcesOfDivergence(); DP.propagate(); } @@ -351,6 +363,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { return DivergentValues.count(V); } +bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const { + if (gpuDA) { + return gpuDA->isDivergentUse(*U); + } + return DivergentValues.count(U->get()) || DivergentUses.count(U); +} + void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) return; diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp index d28b8a189d4b..ba945eb4318f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp @@ -66,6 +66,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" @@ -205,7 +206,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp index 31da4e9ec783..a7d07c0b6183 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp @@ -12,6 +12,9 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -24,34 +27,30 @@ using namespace llvm; -static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align, - const DataLayout &DL) { - APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL)); - - if (!BaseAlign) { - Type *Ty = Base->getType()->getPointerElementType(); - if (!Ty->isSized()) - return false; - BaseAlign = DL.getABITypeAlignment(Ty); - } - - APInt Alignment(Offset.getBitWidth(), Align); - - assert(Alignment.isPowerOf2() && "must be a power of 2!"); - return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) { + if (const MaybeAlign PA = Base->getPointerAlignment(DL)) + return *PA; + Type *const Ty = Base->getType()->getPointerElementType(); + if (!Ty->isSized()) + return None; + return Align(DL.getABITypeAlignment(Ty)); } -static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { - Type *Ty = Base->getType(); - assert(Ty->isSized() && "must be sized"); - APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); - return isAligned(Base, Offset, Align, DL); +static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment, + const DataLayout &DL) { + if (MaybeAlign BA = getBaseAlign(Base, DL)) { + const APInt APBaseAlign(Offset.getBitWidth(), BA->value()); + const APInt APAlign(Offset.getBitWidth(), Alignment.value()); + assert(APAlign.isPowerOf2() && "must be a power of 2!"); + return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1)); + } + return false; } /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( - const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL, + const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, SmallPtrSetImpl<const Value *> &Visited) { // Already visited? Bail out, we've likely hit unreachable code. @@ -63,17 +62,22 @@ static bool isDereferenceableAndAlignedPointer( // bitcast instructions are no-ops as far as dereferenceability is concerned. if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); bool CheckForNonNull = false; APInt KnownDerefBytes(Size.getBitWidth(), V->getPointerDereferenceableBytes(DL, CheckForNonNull)); - if (KnownDerefBytes.getBoolValue()) { - if (KnownDerefBytes.uge(Size)) - if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) - return isAligned(V, Align, DL); - } + if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size)) + if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) { + // As we recursed through GEPs to get here, we've incrementally checked + // that each step advanced by a multiple of the alignment. If our base is + // properly aligned, then the original offset accessed must also be. + Type *Ty = V->getType(); + assert(Ty->isSized() && "must be sized"); + APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); + return isAligned(V, Offset, Alignment, DL); + } // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { @@ -81,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer( APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || - !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) + !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value())) + .isMinValue()) return false; // If the base pointer is dereferenceable for Offset+Size bytes, then the @@ -93,67 +98,69 @@ static bool isDereferenceableAndAlignedPointer( // Offset and Size may have different bit widths if we have visited an // addrspacecast, so we can't do arithmetic directly on the APInt values. return isDereferenceableAndAlignedPointer( - Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()), - DL, CtxI, DT, Visited); + Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL, + CtxI, DT, Visited); } // For gc.relocate, look through relocations if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V)) return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited); + RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited); if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) - return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); if (const auto *Call = dyn_cast<CallBase>(V)) - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) - return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT, - Visited); + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) + return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI, + DT, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + // Note: At the moment, Size can be zero. This ends up being interpreted as + // a query of whether [Base, V] is dereferenceable and V is aligned (since + // that's what the implementation happened to do). It's unclear if this is + // the desired semantic, but at least SelectionDAG does exercise this case. + SmallPtrSet<const Value *, 32> Visited; - return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT, + return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT, Visited); } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, + MaybeAlign MA, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + if (!Ty->isSized()) + return false; + // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. // Require ABI alignment for loads without alignment specification - if (Align == 0) - Align = DL.getABITypeAlignment(Ty); - - if (!Ty->isSized()) - return false; - - SmallPtrSet<const Value *, 32> Visited; - return ::isDereferenceableAndAlignedPointer( - V, Align, - APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)), - DL, CtxI, DT, Visited); + const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty); + APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()), + DL.getTypeStoreSize(Ty)); + return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI, + DT); } bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -187,6 +194,60 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } +bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, + ScalarEvolution &SE, + DominatorTree &DT) { + auto &DL = LI->getModule()->getDataLayout(); + Value *Ptr = LI->getPointerOperand(); + + APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), + DL.getTypeStoreSize(LI->getType())); + const Align Alignment = DL.getValueOrABITypeAlignment( + MaybeAlign(LI->getAlignment()), LI->getType()); + + Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); + + // If given a uniform (i.e. non-varying) address, see if we can prove the + // access is safe within the loop w/o needing predication. + if (L->isLoopInvariant(Ptr)) + return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, + HeaderFirstNonPHI, &DT); + + // Otherwise, check to see if we have a repeating access pattern where we can + // prove that all accesses are well aligned and dereferenceable. + auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr)); + if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine()) + return false; + auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE)); + if (!Step) + return false; + // TODO: generalize to access patterns which have gaps + if (Step->getAPInt() != EltSize) + return false; + + // TODO: If the symbolic trip count has a small bound (max count), we might + // be able to prove safety. + auto TC = SE.getSmallConstantTripCount(L); + if (!TC) + return false; + + const APInt AccessSize = TC * EltSize; + + auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart()); + if (!StartS) + return false; + assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition"); + Value *Base = StartS->getValue(); + + // For the moment, restrict ourselves to the case where the access size is a + // multiple of the requested alignment and the base is aligned. + // TODO: generalize if a case found which warrants + if (EltSize.urem(Alignment.value()) != 0) + return false; + return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, + HeaderFirstNonPHI, &DT); +} + /// Check if executing a load of this pointer value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive @@ -198,64 +259,25 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { // Zero alignment means that the load has the ABI alignment for the target - if (Align == 0) - Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); - assert(isPowerOf2_32(Align)); + const Align Alignment = + DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType()); // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; - if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT)) + if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT)) return true; - int64_t ByteOffset = 0; - Value *Base = V; - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); - - if (ByteOffset < 0) // out of bounds + if (!ScanFrom) return false; - Type *BaseType = nullptr; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { - // Global variables are not necessarily safe to load from if they are - // interposed arbitrarily. Their size may change or they may be weak and - // require a test to determine if they were in fact provided. - if (!GV->isInterposable()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - PointerType *AddrTy = cast<PointerType>(V->getType()); - uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); - - // If we found a base allocated type from either an alloca or global variable, - // try to see if we are definitively within the allocated region. We need to - // know the size of the base type and the loaded type to do anything in this - // case. - if (BaseType && BaseType->isSized()) { - if (BaseAlign == 0) - BaseAlign = DL.getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - // Check if the load is within the bounds of the underlying object. - if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && - ((ByteOffset % Align) == 0)) - return true; - } - } - - if (!ScanFrom) + if (Size.getBitWidth() > 64) return false; + const uint64_t LoadSize = Size.getZExtValue(); // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored @@ -279,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; Value *AccessedPtr; - unsigned AccessedAlign; + MaybeAlign MaybeAccessedAlign; if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { // Ignore volatile loads. The execution of a volatile load cannot // be used to prove an address is backed by regular memory; it can, @@ -287,24 +309,26 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, if (LI->isVolatile()) continue; AccessedPtr = LI->getPointerOperand(); - AccessedAlign = LI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(LI->getAlignment()); } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { // Ignore volatile stores (see comment for loads). if (SI->isVolatile()) continue; AccessedPtr = SI->getPointerOperand(); - AccessedAlign = SI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(SI->getAlignment()); } else continue; Type *AccessedTy = AccessedPtr->getType()->getPointerElementType(); - if (AccessedAlign == 0) - AccessedAlign = DL.getABITypeAlignment(AccessedTy); - if (AccessedAlign < Align) + + const Align AccessedAlign = + DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy); + if (AccessedAlign < Alignment) continue; // Handle trivial cases. - if (AccessedPtr == V) + if (AccessedPtr == V && + LoadSize <= DL.getTypeStoreSize(AccessedTy)) return true; if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && @@ -314,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; } -bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); - return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT); + return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT); } /// DefMaxInstsToScan - the default number of maximum instructions @@ -359,10 +383,6 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, MaxInstsToScan = ~0U; const DataLayout &DL = ScanBB->getModule()->getDataLayout(); - - // Try to get the store size for the type. - auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); - Value *StrippedPtr = Ptr->stripPointerCasts(); while (ScanFrom != ScanBB->begin()) { @@ -401,6 +421,9 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, return LI; } + // Try to get the store size for the type. + auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy)); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 36bd9a8b7ea7..26fa5112c29a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -52,6 +52,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -1189,18 +1190,31 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, unsigned IdxWidth = DL.getIndexSizeInBits(ASA); Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); - APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + // Retrieve the address space again as pointer stripping now tracks through + // `addrspacecast`. + ASA = cast<PointerType>(PtrA->getType())->getAddressSpace(); + ASB = cast<PointerType>(PtrB->getType())->getAddressSpace(); + // Check that the address spaces match and that the pointers are valid. + if (ASA != ASB) + return false; + + IdxWidth = DL.getIndexSizeInBits(ASA); + OffsetA = OffsetA.sextOrTrunc(IdxWidth); + OffsetB = OffsetB.sextOrTrunc(IdxWidth); + + APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); + // OffsetDelta = OffsetB - OffsetA; const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); - const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV); - const APInt &OffsetDelta = OffsetDeltaC->getAPInt(); + const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt(); + // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) @@ -1641,13 +1655,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, // Check every access pair. while (AI != AE) { Visited.insert(*AI); - EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); + bool AIIsWrite = AI->getInt(); + // Check loads only against next equivalent class, but stores also against + // other stores in the same equivalence class - to the same address. + EquivalenceClasses<MemAccessInfo>::member_iterator OI = + (AIIsWrite ? AI : std::next(AI)); while (OI != AE) { // Check every accessing instruction pair in program order. for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), I1E = Accesses[*AI].end(); I1 != I1E; ++I1) - for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(), - I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { + // Scan all accesses of another equivalence class, but only the next + // accesses of the same equivalent class. + for (std::vector<unsigned>::iterator + I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), + I2E = (OI == AI ? I1E : Accesses[*OI].end()); + I2 != I2E; ++I2) { auto A = std::make_pair(&*AI, *I1); auto B = std::make_pair(&*OI, *I2); @@ -2078,7 +2100,7 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, DL = I->getDebugLoc(); } - Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, + Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, CodeRegion); return *Report; } @@ -2323,9 +2345,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI) - : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)), - PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)), - DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), + : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), + PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)), + DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { @@ -2376,11 +2398,15 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { PSE->print(OS, Depth); } +LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) { + initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry()); +} + const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { auto &LAI = LoopAccessInfoMap[L]; if (!LAI) - LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI); + LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI); return *LAI.get(); } @@ -2399,7 +2425,7 @@ void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; + TLI = TLIP ? &TLIP->getTLI(F) : nullptr; AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp index a10a87ce113b..02d40fb8d72a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -46,7 +46,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( // invalidation logic below to act on that. auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>(); bool invalidateMemorySSAAnalysis = false; - if (EnableMSSALoopDependency) + if (MSSAUsed) invalidateMemorySSAAnalysis = Inv.invalidate<MemorySSAAnalysis>(F, PA); if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || Inv.invalidate<AAManager>(F, PA) || diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp new file mode 100644 index 000000000000..25325ec1be02 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -0,0 +1,629 @@ +//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the implementation for the loop cache analysis. +/// The implementation is largely based on the following paper: +/// +/// Compiler Optimizations for Improving Data Locality +/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng +/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf +/// +/// The general approach taken to estimate the number of cache lines used by the +/// memory references in an inner loop is: +/// 1. Partition memory references that exhibit temporal or spacial reuse +/// into reference groups. +/// 2. For each loop L in the a loop nest LN: +/// a. Compute the cost of the reference group +/// b. Compute the loop cost by summing up the reference groups costs +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopCacheAnalysis.h" +#include "llvm/ADT/BreadthFirstIterator.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "loop-cache-cost" + +static cl::opt<unsigned> DefaultTripCount( + "default-trip-count", cl::init(100), cl::Hidden, + cl::desc("Use this to specify the default trip count of a loop")); + +// In this analysis two array references are considered to exhibit temporal +// reuse if they access either the same memory location, or a memory location +// with distance smaller than a configurable threshold. +static cl::opt<unsigned> TemporalReuseThreshold( + "temporal-reuse-threshold", cl::init(2), cl::Hidden, + cl::desc("Use this to specify the max. distance between array elements " + "accessed in a loop so that the elements are classified to have " + "temporal reuse")); + +/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a +/// nullptr if any loops in the loop vector supplied has more than one sibling. +/// The loop vector is expected to contain loops collected in breadth-first +/// order. +static Loop *getInnerMostLoop(const LoopVectorTy &Loops) { + assert(!Loops.empty() && "Expecting a non-empy loop vector"); + + Loop *LastLoop = Loops.back(); + Loop *ParentLoop = LastLoop->getParentLoop(); + + if (ParentLoop == nullptr) { + assert(Loops.size() == 1 && "Expecting a single loop"); + return LastLoop; + } + + return (std::is_sorted(Loops.begin(), Loops.end(), + [](const Loop *L1, const Loop *L2) { + return L1->getLoopDepth() < L2->getLoopDepth(); + })) + ? LastLoop + : nullptr; +} + +static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, + const Loop &L, ScalarEvolution &SE) { + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn); + if (!AR || !AR->isAffine()) + return false; + + assert(AR->getLoop() && "AR should have a loop"); + + // Check that start and increment are not add recurrences. + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(SE); + if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step)) + return false; + + // Check that start and increment are both invariant in the loop. + if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) + return false; + + return AR->getStepRecurrence(SE) == &ElemSize; +} + +/// Compute the trip count for the given loop \p L. Return the SCEV expression +/// for the trip count or nullptr if it cannot be computed. +static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) { + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + !isa<SCEVConstant>(BackedgeTakenCount)) + return nullptr; + + return SE.getAddExpr(BackedgeTakenCount, + SE.getOne(BackedgeTakenCount->getType())); +} + +//===----------------------------------------------------------------------===// +// IndexedReference implementation +// +raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) { + if (!R.IsValid) { + OS << R.StoreOrLoadInst; + OS << ", IsValid=false."; + return OS; + } + + OS << *R.BasePointer; + for (const SCEV *Subscript : R.Subscripts) + OS << "[" << *Subscript << "]"; + + OS << ", Sizes: "; + for (const SCEV *Size : R.Sizes) + OS << "[" << *Size << "]"; + + return OS; +} + +IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, + const LoopInfo &LI, ScalarEvolution &SE) + : StoreOrLoadInst(StoreOrLoadInst), SE(SE) { + assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) && + "Expecting a load or store instruction"); + + IsValid = delinearize(LI); + if (IsValid) + LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this + << "\n"); +} + +Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other, + unsigned CLS, + AliasAnalysis &AA) const { + assert(IsValid && "Expecting a valid reference"); + + if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse: different base pointers\n"); + return false; + } + + unsigned NumSubscripts = getNumSubscripts(); + if (NumSubscripts != Other.getNumSubscripts()) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse: different number of subscripts\n"); + return false; + } + + // all subscripts must be equal, except the leftmost one (the last one). + for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) { + if (getSubscript(SubNum) != Other.getSubscript(SubNum)) { + LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: " + << "\n\t" << *getSubscript(SubNum) << "\n\t" + << *Other.getSubscript(SubNum) << "\n"); + return false; + } + } + + // the difference between the last subscripts must be less than the cache line + // size. + const SCEV *LastSubscript = getLastSubscript(); + const SCEV *OtherLastSubscript = Other.getLastSubscript(); + const SCEVConstant *Diff = dyn_cast<SCEVConstant>( + SE.getMinusSCEV(LastSubscript, OtherLastSubscript)); + + if (Diff == nullptr) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse, difference between subscript:\n\t" + << *LastSubscript << "\n\t" << OtherLastSubscript + << "\nis not constant.\n"); + return None; + } + + bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS); + + LLVM_DEBUG({ + if (InSameCacheLine) + dbgs().indent(2) << "Found spacial reuse.\n"; + else + dbgs().indent(2) << "No spacial reuse.\n"; + }); + + return InSameCacheLine; +} + +Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other, + unsigned MaxDistance, + const Loop &L, + DependenceInfo &DI, + AliasAnalysis &AA) const { + assert(IsValid && "Expecting a valid reference"); + + if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { + LLVM_DEBUG(dbgs().indent(2) + << "No temporal reuse: different base pointer\n"); + return false; + } + + std::unique_ptr<Dependence> D = + DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true); + + if (D == nullptr) { + LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n"); + return false; + } + + if (D->isLoopIndependent()) { + LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); + return true; + } + + // Check the dependence distance at every loop level. There is temporal reuse + // if the distance at the given loop's depth is small (|d| <= MaxDistance) and + // it is zero at every other loop level. + int LoopDepth = L.getLoopDepth(); + int Levels = D->getLevels(); + for (int Level = 1; Level <= Levels; ++Level) { + const SCEV *Distance = D->getDistance(Level); + const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance); + + if (SCEVConst == nullptr) { + LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n"); + return None; + } + + const ConstantInt &CI = *SCEVConst->getValue(); + if (Level != LoopDepth && !CI.isZero()) { + LLVM_DEBUG(dbgs().indent(2) + << "No temporal reuse: distance is not zero at depth=" << Level + << "\n"); + return false; + } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) { + LLVM_DEBUG( + dbgs().indent(2) + << "No temporal reuse: distance is greater than MaxDistance at depth=" + << Level << "\n"); + return false; + } + } + + LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); + return true; +} + +CacheCostTy IndexedReference::computeRefCost(const Loop &L, + unsigned CLS) const { + assert(IsValid && "Expecting a valid reference"); + LLVM_DEBUG({ + dbgs().indent(2) << "Computing cache cost for:\n"; + dbgs().indent(4) << *this << "\n"; + }); + + // If the indexed reference is loop invariant the cost is one. + if (isLoopInvariant(L)) { + LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n"); + return 1; + } + + const SCEV *TripCount = computeTripCount(L, SE); + if (!TripCount) { + LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() + << " could not be computed, using DefaultTripCount\n"); + const SCEV *ElemSize = Sizes.back(); + TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount); + } + LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); + + // If the indexed reference is 'consecutive' the cost is + // (TripCount*Stride)/CLS, otherwise the cost is TripCount. + const SCEV *RefCost = TripCount; + + if (isConsecutive(L, CLS)) { + const SCEV *Coeff = getLastCoefficient(); + const SCEV *ElemSize = Sizes.back(); + const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); + Stride = SE.getNoopOrSignExtend(Stride, WiderType); + TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); + const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); + RefCost = SE.getUDivExpr(Numerator, CacheLineSize); + LLVM_DEBUG(dbgs().indent(4) + << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" + << *RefCost << "\n"); + } else + LLVM_DEBUG(dbgs().indent(4) + << "Access is not consecutive: RefCost=TripCount=" << *RefCost + << "\n"); + + // Attempt to fold RefCost into a constant. + if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost)) + return ConstantCost->getValue()->getSExtValue(); + + LLVM_DEBUG(dbgs().indent(4) + << "RefCost is not a constant! Setting to RefCost=InvalidCost " + "(invalid value).\n"); + + return CacheCost::InvalidCost; +} + +bool IndexedReference::delinearize(const LoopInfo &LI) { + assert(Subscripts.empty() && "Subscripts should be empty"); + assert(Sizes.empty() && "Sizes should be empty"); + assert(!IsValid && "Should be called once from the constructor"); + LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n"); + + const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst); + const BasicBlock *BB = StoreOrLoadInst.getParent(); + + if (Loop *L = LI.getLoopFor(BB)) { + const SCEV *AccessFn = + SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L); + + BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn)); + if (BasePointer == nullptr) { + LLVM_DEBUG( + dbgs().indent(2) + << "ERROR: failed to delinearize, can't identify base pointer\n"); + return false; + } + + AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); + + LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() + << "', AccessFn: " << *AccessFn << "\n"); + + SE.delinearize(AccessFn, Subscripts, Sizes, + SE.getElementSize(&StoreOrLoadInst)); + + if (Subscripts.empty() || Sizes.empty() || + Subscripts.size() != Sizes.size()) { + // Attempt to determine whether we have a single dimensional array access. + // before giving up. + if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { + LLVM_DEBUG(dbgs().indent(2) + << "ERROR: failed to delinearize reference\n"); + Subscripts.clear(); + Sizes.clear(); + return false; + } + + const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); + Subscripts.push_back(Div); + Sizes.push_back(ElemSize); + } + + return all_of(Subscripts, [&](const SCEV *Subscript) { + return isSimpleAddRecurrence(*Subscript, *L); + }); + } + + return false; +} + +bool IndexedReference::isLoopInvariant(const Loop &L) const { + Value *Addr = getPointerOperand(&StoreOrLoadInst); + assert(Addr != nullptr && "Expecting either a load or a store instruction"); + assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable"); + + if (SE.isLoopInvariant(SE.getSCEV(Addr), &L)) + return true; + + // The indexed reference is loop invariant if none of the coefficients use + // the loop induction variable. + bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) { + return isCoeffForLoopZeroOrInvariant(*Subscript, L); + }); + + return allCoeffForLoopAreZero; +} + +bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { + // The indexed reference is 'consecutive' if the only coefficient that uses + // the loop induction variable is the last one... + const SCEV *LastSubscript = Subscripts.back(); + for (const SCEV *Subscript : Subscripts) { + if (Subscript == LastSubscript) + continue; + if (!isCoeffForLoopZeroOrInvariant(*Subscript, L)) + return false; + } + + // ...and the access stride is less than the cache line size. + const SCEV *Coeff = getLastCoefficient(); + const SCEV *ElemSize = Sizes.back(); + const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + + return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); +} + +const SCEV *IndexedReference::getLastCoefficient() const { + const SCEV *LastSubscript = getLastSubscript(); + assert(isa<SCEVAddRecExpr>(LastSubscript) && + "Expecting a SCEV add recurrence expression"); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript); + return AR->getStepRecurrence(SE); +} + +bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, + const Loop &L) const { + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript); + return (AR != nullptr) ? AR->getLoop() != &L + : SE.isLoopInvariant(&Subscript, &L); +} + +bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, + const Loop &L) const { + if (!isa<SCEVAddRecExpr>(Subscript)) + return false; + + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript); + assert(AR->getLoop() && "AR should have a loop"); + + if (!AR->isAffine()) + return false; + + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(SE); + + if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) + return false; + + return true; +} + +bool IndexedReference::isAliased(const IndexedReference &Other, + AliasAnalysis &AA) const { + const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); + const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); + return AA.isMustAlias(Loc1, Loc2); +} + +//===----------------------------------------------------------------------===// +// CacheCost implementation +// +raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { + for (const auto &LC : CC.LoopCosts) { + const Loop *L = LC.first; + OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n"; + } + return OS; +} + +CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, + ScalarEvolution &SE, TargetTransformInfo &TTI, + AliasAnalysis &AA, DependenceInfo &DI, + Optional<unsigned> TRT) + : Loops(Loops), TripCounts(), LoopCosts(), + TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT), + LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { + assert(!Loops.empty() && "Expecting a non-empty loop vector."); + + for (const Loop *L : Loops) { + unsigned TripCount = SE.getSmallConstantTripCount(L); + TripCount = (TripCount == 0) ? DefaultTripCount : TripCount; + TripCounts.push_back({L, TripCount}); + } + + calculateCacheFootprint(); +} + +std::unique_ptr<CacheCost> +CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, + DependenceInfo &DI, Optional<unsigned> TRT) { + if (Root.getParentLoop()) { + LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); + return nullptr; + } + + LoopVectorTy Loops; + for (Loop *L : breadth_first(&Root)) + Loops.push_back(L); + + if (!getInnerMostLoop(Loops)) { + LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more " + "than one innermost loop\n"); + return nullptr; + } + + return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT); +} + +void CacheCost::calculateCacheFootprint() { + LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n"); + ReferenceGroupsTy RefGroups; + if (!populateReferenceGroups(RefGroups)) + return; + + LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n"); + for (const Loop *L : Loops) { + assert((std::find_if(LoopCosts.begin(), LoopCosts.end(), + [L](const LoopCacheCostTy &LCC) { + return LCC.first == L; + }) == LoopCosts.end()) && + "Should not add duplicate element"); + CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups); + LoopCosts.push_back(std::make_pair(L, LoopCost)); + } + + sortLoopCosts(); + RefGroups.clear(); +} + +bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { + assert(RefGroups.empty() && "Reference groups should be empty"); + + unsigned CLS = TTI.getCacheLineSize(); + Loop *InnerMostLoop = getInnerMostLoop(Loops); + assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop"); + + for (BasicBlock *BB : InnerMostLoop->getBlocks()) { + for (Instruction &I : *BB) { + if (!isa<StoreInst>(I) && !isa<LoadInst>(I)) + continue; + + std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE)); + if (!R->isValid()) + continue; + + bool Added = false; + for (ReferenceGroupTy &RefGroup : RefGroups) { + const IndexedReference &Representative = *RefGroup.front().get(); + LLVM_DEBUG({ + dbgs() << "References:\n"; + dbgs().indent(2) << *R << "\n"; + dbgs().indent(2) << Representative << "\n"; + }); + + Optional<bool> HasTemporalReuse = + R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA); + Optional<bool> HasSpacialReuse = + R->hasSpacialReuse(Representative, CLS, AA); + + if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) || + (HasSpacialReuse.hasValue() && *HasSpacialReuse)) { + RefGroup.push_back(std::move(R)); + Added = true; + break; + } + } + + if (!Added) { + ReferenceGroupTy RG; + RG.push_back(std::move(R)); + RefGroups.push_back(std::move(RG)); + } + } + } + + if (RefGroups.empty()) + return false; + + LLVM_DEBUG({ + dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n"; + int n = 1; + for (const ReferenceGroupTy &RG : RefGroups) { + dbgs().indent(2) << "RefGroup " << n << ":\n"; + for (const auto &IR : RG) + dbgs().indent(4) << *IR << "\n"; + n++; + } + dbgs() << "\n"; + }); + + return true; +} + +CacheCostTy +CacheCost::computeLoopCacheCost(const Loop &L, + const ReferenceGroupsTy &RefGroups) const { + if (!L.isLoopSimplifyForm()) + return InvalidCost; + + LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName() + << "' as innermost loop.\n"); + + // Compute the product of the trip counts of each other loop in the nest. + CacheCostTy TripCountsProduct = 1; + for (const auto &TC : TripCounts) { + if (TC.first == &L) + continue; + TripCountsProduct *= TC.second; + } + + CacheCostTy LoopCost = 0; + for (const ReferenceGroupTy &RG : RefGroups) { + CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L); + LoopCost += RefGroupCost * TripCountsProduct; + } + + LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName() + << "' has cost=" << LoopCost << "\n"); + + return LoopCost; +} + +CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG, + const Loop &L) const { + assert(!RG.empty() && "Reference group should have at least one member."); + + const IndexedReference *Representative = RG.front().get(); + return Representative->computeRefCost(L, TTI.getCacheLineSize()); +} + +//===----------------------------------------------------------------------===// +// LoopCachePrinterPass implementation +// +PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + + if (auto CC = CacheCost::getCacheCost(L, AR, DI)) + OS << *CC; + + return PreservedAnalyses::all(); +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp index aa5da0859805..3dc29b40834c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -105,7 +106,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, I->moveBefore(InsertPt); if (MSSAU) if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I)) - MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End); + MSSAU->moveToPlace(MUD, InsertPt->getParent(), + MemorySSA::BeforeTerminator); // There is possibility of hoisting this instruction above some arbitrary // condition. Any metadata defined on it can be control dependent on this @@ -359,6 +361,44 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar, return SE.isLoopInvariant(IndDesc.getStep(), this); } +BranchInst *Loop::getLoopGuardBranch() const { + if (!isLoopSimplifyForm()) + return nullptr; + + BasicBlock *Preheader = getLoopPreheader(); + assert(Preheader && getLoopLatch() && + "Expecting a loop with valid preheader and latch"); + + // Loop should be in rotate form. + if (!isRotatedForm()) + return nullptr; + + // Disallow loops with more than one unique exit block, as we do not verify + // that GuardOtherSucc post dominates all exit blocks. + BasicBlock *ExitFromLatch = getUniqueExitBlock(); + if (!ExitFromLatch) + return nullptr; + + BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor(); + if (!ExitFromLatchSucc) + return nullptr; + + BasicBlock *GuardBB = Preheader->getUniquePredecessor(); + if (!GuardBB) + return nullptr; + + assert(GuardBB->getTerminator() && "Expecting valid guard terminator"); + + BranchInst *GuardBI = dyn_cast<BranchInst>(GuardBB->getTerminator()); + if (!GuardBI || GuardBI->isUnconditional()) + return nullptr; + + BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader) + ? GuardBI->getSuccessor(1) + : GuardBI->getSuccessor(0); + return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr; +} + bool Loop::isCanonical(ScalarEvolution &SE) const { InductionDescriptor IndDesc; if (!getInductionDescriptor(SE, IndDesc)) @@ -1012,6 +1052,10 @@ MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context, // LoopInfo implementation // +LoopInfoWrapperPass::LoopInfoWrapperPass() : FunctionPass(ID) { + initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + char LoopInfoWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information", true, true) diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp index 4ab3798039d8..507f5f442865 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp @@ -20,9 +20,10 @@ #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PassTimingInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Timer.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -409,6 +410,10 @@ bool LoopPass::skipLoop(const Loop *L) const { return false; } +LCSSAVerificationPass::LCSSAVerificationPass() : FunctionPass(ID) { + initializeLCSSAVerificationPassPass(*PassRegistry::getPassRegistry()); +} + char LCSSAVerificationPass::ID = 0; INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier", false, false) diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp index 1728b5e9f6d2..762623de41e9 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -78,7 +78,7 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast<FPMathOperator>(&I)) SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); + SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp index 6e1bb50e8893..2c57e63251c6 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp index 77ebf89d9a08..5d824067df53 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -55,8 +56,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getType(), - LI->getAlignment(), DL)) + if (isDereferenceableAndAlignedPointer( + PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL)) DerefAndAligned.insert(PO); } } diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp index 729dad463657..427e6fd3ace2 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -180,6 +180,19 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy, return None; } +static Optional<AllocFnsTy> +getAllocationData(const Value *V, AllocType AllocTy, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + bool LookThroughBitCast = false) { + bool IsNoBuiltinCall; + if (const Function *Callee = + getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall)) + if (!IsNoBuiltinCall) + return getAllocationDataForFunction( + Callee, AllocTy, &GetTLI(const_cast<Function &>(*Callee))); + return None; +} + static Optional<AllocFnsTy> getAllocationSize(const Value *V, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; @@ -223,6 +236,11 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue(); } +bool llvm::isAllocationFn( + const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue(); +} /// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). @@ -240,6 +258,12 @@ bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue(); } +bool llvm::isMallocLikeFn( + const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast) + .hasValue(); +} /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). @@ -276,12 +300,27 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue(); } +/// Tests if a value is a call or invoke to a library function that +/// allocates memory and throws if an allocation failed (e.g., new). +bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue(); +} + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (strdup, strndup). +bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue(); +} + /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I, - const TargetLibraryInfo *TLI) { - return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr; +const CallInst *llvm::extractMallocCall( + const Value *I, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { + return isMallocLikeFn(I, GetTLI) ? dyn_cast<CallInst>(I) : nullptr; } static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, @@ -505,6 +544,7 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second); Value *UseZero = Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second); + ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType); return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0), ResultSize); } @@ -521,9 +561,9 @@ STATISTIC(ObjectVisitorArgument, STATISTIC(ObjectVisitorLoad, "Number of load instructions with unsolved size and offset"); -APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { - if (Options.RoundToAlign && Align) - return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align)); +APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) { + if (Options.RoundToAlign && Alignment) + return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment))); return Size; } @@ -537,7 +577,7 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, } SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { - IntTyBits = DL.getPointerTypeSizeInBits(V->getType()); + IntTyBits = DL.getIndexTypeSizeInBits(V->getType()); Zero = APInt::getNullValue(IntTyBits); V = V->stripPointerCasts(); @@ -707,7 +747,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); - APInt Offset(IntTyBits, 0); + APInt Offset(DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()), 0); if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset)) return unknown(); @@ -795,7 +835,7 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator( SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { // XXX - Are vectors of pointers possible here? - IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); + IntTy = cast<IntegerType>(DL.getIndexType(V->getType())); Zero = ConstantInt::get(IntTy, 0); SizeOffsetEvalType Result = compute_(V); @@ -899,12 +939,12 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { } Value *FirstArg = CS.getArgument(FnData->FstParam); - FirstArg = Builder.CreateZExt(FirstArg, IntTy); + FirstArg = Builder.CreateZExtOrTrunc(FirstArg, IntTy); if (FnData->SndParam < 0) return std::make_pair(FirstArg, Zero); Value *SecondArg = CS.getArgument(FnData->SndParam); - SecondArg = Builder.CreateZExt(SecondArg, IntTy); + SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy); Value *Size = Builder.CreateMul(FirstArg, SecondArg); return std::make_pair(Size, Zero); diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index b25b655165d7..a97a56e25805 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -183,7 +184,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, MemDepResult MemoryDependenceResults::getCallDependencyFrom( CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB) { - unsigned Limit = BlockScanLimit; + unsigned Limit = getDefaultBlockScanLimit(); // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -356,7 +357,7 @@ MemDepResult MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB) { - if (!LI->getMetadata(LLVMContext::MD_invariant_group)) + if (!LI->hasMetadata(LLVMContext::MD_invariant_group)) return MemDepResult::getUnknown(); // Take the ptr operand after all casts and geps 0. This way we can search @@ -417,7 +418,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // same pointer operand) we can assume that value pointed by pointer // operand didn't change. if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && - U->getMetadata(LLVMContext::MD_invariant_group) != nullptr) + U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } } @@ -443,7 +444,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( OrderedBasicBlock *OBB) { bool isInvariantLoad = false; - unsigned DefaultLimit = BlockScanLimit; + unsigned DefaultLimit = getDefaultBlockScanLimit(); if (!Limit) Limit = &DefaultLimit; @@ -481,7 +482,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // Arguably, this logic should be pushed inside AliasAnalysis itself. if (isLoad && QueryInst) { LoadInst *LI = dyn_cast<LoadInst>(QueryInst); - if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load)) isInvariantLoad = true; } @@ -1493,7 +1494,7 @@ void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies( if (auto *I = dyn_cast<Instruction>(P.getPointer())) { auto toRemoveIt = ReverseNonLocalDefsCache.find(I); if (toRemoveIt != ReverseNonLocalDefsCache.end()) { - for (const auto &entry : toRemoveIt->second) + for (const auto *entry : toRemoveIt->second) NonLocalDefsCache.erase(entry); ReverseNonLocalDefsCache.erase(toRemoveIt); } @@ -1746,6 +1747,9 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const { AnalysisKey MemoryDependenceAnalysis::Key; +MemoryDependenceAnalysis::MemoryDependenceAnalysis() + : DefaultBlockScanLimit(BlockScanLimit) {} + MemoryDependenceResults MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AA = AM.getResult<AAManager>(F); @@ -1753,7 +1757,7 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &PV = AM.getResult<PhiValuesAnalysis>(F); - return MemoryDependenceResults(AA, AC, TLI, DT, PV); + return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit); } char MemoryDependenceWrapperPass::ID = 0; @@ -1807,15 +1811,15 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P } unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { - return BlockScanLimit; + return DefaultBlockScanLimit; } bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult(); - MemDep.emplace(AA, AC, TLI, DT, PV); + MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit); return false; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp index 163830eee797..103cdea148e5 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp index 17f5d9b9f0ad..bf8dc94bfbf9 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Use.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -49,6 +50,7 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> +#include <cstdlib> #include <iterator> #include <memory> #include <utility> @@ -83,7 +85,7 @@ bool llvm::VerifyMemorySSA = false; #endif /// Enables memory ssa as a dependency for loop passes in legacy pass manager. cl::opt<bool> llvm::EnableMSSALoopDependency( - "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), + "enable-mssa-loop-dependency", cl::Hidden, cl::init(true), cl::desc("Enable MemorySSA dependency for loop pass manager")); static cl::opt<bool, true> @@ -284,6 +286,11 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, case Intrinsic::invariant_end: case Intrinsic::assume: return {false, NoAlias}; + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + llvm_unreachable("debuginfo shouldn't have associated defs!"); default: break; } @@ -369,7 +376,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. - return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || + return isa<LoadInst>(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) || AA.pointsToConstantMemory(MemoryLocation( cast<LoadInst>(I)->getPointerOperand()))); } @@ -867,6 +874,7 @@ template <class AliasAnalysisType> class ClobberWalker { if (!DefChainEnd) for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) DefChainEnd = MA; + assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry"); // If any of the terminated paths don't dominate the phi we'll try to // optimize, we need to figure out what they are and quit. @@ -1087,9 +1095,14 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, AccessList *Accesses = It->second.get(); auto *Phi = cast<MemoryPhi>(&Accesses->front()); if (RenameAllUses) { - int PhiIndex = Phi->getBasicBlockIndex(BB); - assert(PhiIndex != -1 && "Incomplete phi during partial rename"); - Phi->setIncomingValue(PhiIndex, IncomingVal); + bool ReplacementDone = false; + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + if (Phi->getIncomingBlock(I) == BB) { + Phi->setIncomingValue(I, IncomingVal); + ReplacementDone = true; + } + (void) ReplacementDone; + assert(ReplacementDone && "Incomplete phi during partial rename"); } else Phi->addIncoming(IncomingVal, BB); } @@ -1217,6 +1230,7 @@ MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) // safe because there are no CFG changes while building MemorySSA and can // significantly reduce the time spent by the compiler in AA, because we will // make queries about all the instructions in the Function. + assert(AA && "No alias analysis?"); BatchAAResults BatchAA(*AA); buildMemorySSA(BatchAA); // Intentionally leave AA to nullptr while building so we don't accidently @@ -1237,7 +1251,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = llvm::make_unique<AccessList>(); + Res.first->second = std::make_unique<AccessList>(); return Res.first->second.get(); } @@ -1245,7 +1259,7 @@ MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = llvm::make_unique<DefsList>(); + Res.first->second = std::make_unique<DefsList>(); return Res.first->second.get(); } @@ -1554,10 +1568,10 @@ MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() { if (!WalkerBase) WalkerBase = - llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); + std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); Walker = - llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get()); + std::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get()); return Walker.get(); } @@ -1567,10 +1581,10 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() { if (!WalkerBase) WalkerBase = - llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); + std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); SkipWalker = - llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get()); + std::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get()); return SkipWalker.get(); } @@ -1687,13 +1701,15 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, MemoryAccess *Definition, - const MemoryUseOrDef *Template) { + const MemoryUseOrDef *Template, + bool CreationMustSucceed) { assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template); - assert( - NewAccess != nullptr && - "Tried to create a memory access for a non-memory touching instruction"); - NewAccess->setDefiningAccess(Definition); + if (CreationMustSucceed) + assert(NewAccess != nullptr && "Tried to create a memory access for a " + "non-memory touching instruction"); + if (NewAccess) + NewAccess->setDefiningAccess(Definition); return NewAccess; } @@ -1717,13 +1733,21 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, AliasAnalysisType *AAP, const MemoryUseOrDef *Template) { // The assume intrinsic has a control dependency which we model by claiming - // that it writes arbitrarily. Ignore that fake memory dependency here. + // that it writes arbitrarily. Debuginfo intrinsics may be considered + // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory + // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) if (II->getIntrinsicID() == Intrinsic::assume) return nullptr; + // Using a nonstandard AA pipelines might leave us with unexpected modref + // results for I, so add a check to not model instructions that may not read + // from or write to memory. This is necessary for correctness. + if (!I->mayReadFromMemory() && !I->mayWriteToMemory()) + return nullptr; + bool Def, Use; if (Template) { Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr; @@ -1846,10 +1870,9 @@ LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } #endif void MemorySSA::verifyMemorySSA() const { - verifyDefUses(F); - verifyDomination(F); - verifyOrdering(F); + verifyOrderingDominationAndDefUses(F); verifyDominationNumbers(F); + verifyPrevDefInPhis(F); // Previously, the verification used to also verify that the clobberingAccess // cached by MemorySSA is the same as the clobberingAccess found at a later // query to AA. This does not hold true in general due to the current fragility @@ -1862,6 +1885,40 @@ void MemorySSA::verifyMemorySSA() const { // example, see test4 added in D51960. } +void MemorySSA::verifyPrevDefInPhis(Function &F) const { +#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) + for (const BasicBlock &BB : F) { + if (MemoryPhi *Phi = getMemoryAccess(&BB)) { + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { + auto *Pred = Phi->getIncomingBlock(I); + auto *IncAcc = Phi->getIncomingValue(I); + // If Pred has no unreachable predecessors, get last def looking at + // IDoms. If, while walkings IDoms, any of these has an unreachable + // predecessor, then the incoming def can be any access. + if (auto *DTNode = DT->getNode(Pred)) { + while (DTNode) { + if (auto *DefList = getBlockDefs(DTNode->getBlock())) { + auto *LastAcc = &*(--DefList->end()); + assert(LastAcc == IncAcc && + "Incorrect incoming access into phi."); + break; + } + DTNode = DTNode->getIDom(); + } + } else { + // If Pred has unreachable predecessors, but has at least a Def, the + // incoming access can be the last Def in Pred, or it could have been + // optimized to LoE. After an update, though, the LoE may have been + // replaced by another access, so IncAcc may be any access. + // If Pred has unreachable predecessors and no Defs, incoming access + // should be LoE; However, after an update, it may be any access. + } + } + } + } +#endif +} + /// Verify that all of the blocks we believe to have valid domination numbers /// actually have valid domination numbers. void MemorySSA::verifyDominationNumbers(const Function &F) const { @@ -1900,10 +1957,14 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const { #endif } -/// Verify that the order and existence of MemoryAccesses matches the +/// Verify ordering: the order and existence of MemoryAccesses matches the /// order and existence of memory affecting instructions. -void MemorySSA::verifyOrdering(Function &F) const { -#ifndef NDEBUG +/// Verify domination: each definition dominates all of its uses. +/// Verify def-uses: the immediate use information - walk all the memory +/// accesses and verifying that, for each use, it appears in the appropriate +/// def's use list +void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const { +#if !defined(NDEBUG) // Walk all the blocks, comparing what the lookups think and what the access // lists think, as well as the order in the blocks vs the order in the access // lists. @@ -1912,29 +1973,56 @@ void MemorySSA::verifyOrdering(Function &F) const { for (BasicBlock &B : F) { const AccessList *AL = getBlockAccesses(&B); const auto *DL = getBlockDefs(&B); - MemoryAccess *Phi = getMemoryAccess(&B); + MemoryPhi *Phi = getMemoryAccess(&B); if (Phi) { + // Verify ordering. ActualAccesses.push_back(Phi); ActualDefs.push_back(Phi); + // Verify domination + for (const Use &U : Phi->uses()) + assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses"); +#if defined(EXPENSIVE_CHECKS) + // Verify def-uses. + assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( + pred_begin(&B), pred_end(&B))) && + "Incomplete MemoryPhi Node"); + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { + verifyUseInDefs(Phi->getIncomingValue(I), Phi); + assert(find(predecessors(&B), Phi->getIncomingBlock(I)) != + pred_end(&B) && + "Incoming phi block not a block predecessor"); + } +#endif } for (Instruction &I : B) { - MemoryAccess *MA = getMemoryAccess(&I); + MemoryUseOrDef *MA = getMemoryAccess(&I); assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && "We have memory affecting instructions " "in this block but they are not in the " "access list or defs list"); if (MA) { + // Verify ordering. ActualAccesses.push_back(MA); - if (isa<MemoryDef>(MA)) + if (MemoryAccess *MD = dyn_cast<MemoryDef>(MA)) { + // Verify ordering. ActualDefs.push_back(MA); + // Verify domination. + for (const Use &U : MD->uses()) + assert(dominates(MD, U) && + "Memory Def does not dominate it's uses"); + } +#if defined(EXPENSIVE_CHECKS) + // Verify def-uses. + verifyUseInDefs(MA->getDefiningAccess(), MA); +#endif } } // Either we hit the assert, really have no accesses, or we have both - // accesses and an access list. - // Same with defs. + // accesses and an access list. Same with defs. if (!AL && !DL) continue; + // Verify ordering. assert(AL->size() == ActualAccesses.size() && "We don't have the same number of accesses in the block as on the " "access list"); @@ -1965,28 +2053,6 @@ void MemorySSA::verifyOrdering(Function &F) const { #endif } -/// Verify the domination properties of MemorySSA by checking that each -/// definition dominates all of its uses. -void MemorySSA::verifyDomination(Function &F) const { -#ifndef NDEBUG - for (BasicBlock &B : F) { - // Phi nodes are attached to basic blocks - if (MemoryPhi *MP = getMemoryAccess(&B)) - for (const Use &U : MP->uses()) - assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); - - for (Instruction &I : B) { - MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); - if (!MD) - continue; - - for (const Use &U : MD->uses()) - assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); - } - } -#endif -} - /// Verify the def-use lists in MemorySSA, by verifying that \p Use /// appears in the use list of \p Def. void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { @@ -2001,34 +2067,6 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { #endif } -/// Verify the immediate use information, by walking all the memory -/// accesses and verifying that, for each use, it appears in the -/// appropriate def's use list -void MemorySSA::verifyDefUses(Function &F) const { -#ifndef NDEBUG - for (BasicBlock &B : F) { - // Phi nodes are attached to basic blocks - if (MemoryPhi *Phi = getMemoryAccess(&B)) { - assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( - pred_begin(&B), pred_end(&B))) && - "Incomplete MemoryPhi Node"); - for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { - verifyUseInDefs(Phi->getIncomingValue(I), Phi); - assert(find(predecessors(&B), Phi->getIncomingBlock(I)) != - pred_end(&B) && - "Incoming phi block not a block predecessor"); - } - } - - for (Instruction &I : B) { - if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { - verifyUseInDefs(MA->getDefiningAccess(), MA); - } - } - } -#endif -} - /// Perform a local numbering on blocks so that instruction ordering can be /// determined in constant time. /// TODO: We currently just number in order. If we numbered by N, we could @@ -2212,7 +2250,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); - return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT)); + return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(F, &AA, &DT)); } bool MemorySSAAnalysis::Result::invalidate( diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp index 4c1feee7fd9a..473268982f2d 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // First, do a cache lookup. Without this cache, certain CFG structures // (like a series of if statements) take exponential time to visit. auto Cached = CachedPreviousDef.find(BB); - if (Cached != CachedPreviousDef.end()) { + if (Cached != CachedPreviousDef.end()) return Cached->second; - } - if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // If this method is called from an unreachable block, return LoE. + if (!MSSA->DT->isReachableFromEntry(BB)) + return MSSA->getLiveOnEntryDef(); + + if (BasicBlock *Pred = BB->getUniquePredecessor()) { + VisitedBlocks.insert(BB); // Single predecessor case, just recurse, we can only have one definition. MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); CachedPreviousDef.insert({BB, Result}); @@ -71,11 +75,19 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // Recurse to get the values in our predecessors for placement of a // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. - for (auto *Pred : predecessors(BB)) - if (MSSA->DT->isReachableFromEntry(Pred)) - PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); - else + bool UniqueIncomingAccess = true; + MemoryAccess *SingleAccess = nullptr; + for (auto *Pred : predecessors(BB)) { + if (MSSA->DT->isReachableFromEntry(Pred)) { + auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef); + if (!SingleAccess) + SingleAccess = IncomingAccess; + else if (IncomingAccess != SingleAccess) + UniqueIncomingAccess = false; + PhiOps.push_back(IncomingAccess); + } else PhiOps.push_back(MSSA->getLiveOnEntryDef()); + } // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -84,7 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); // If we couldn't simplify, we may have to create a phi - if (Result == Phi) { + if (Result == Phi && UniqueIncomingAccess && SingleAccess) { + // A concrete Phi only exists if we created an empty one to break a cycle. + if (Phi) { + assert(Phi->operands().empty() && "Expected empty Phi"); + Phi->replaceAllUsesWith(SingleAccess); + removeMemoryAccess(Phi); + } + Result = SingleAccess; + } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); @@ -173,12 +193,9 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { TrackingVH<MemoryAccess> Res(Phi); SmallVector<TrackingVH<Value>, 8> Uses; std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses)); - for (auto &U : Uses) { - if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) { - auto OperRange = UsePhi->operands(); - tryRemoveTrivialPhi(UsePhi, OperRange); - } - } + for (auto &U : Uses) + if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) + tryRemoveTrivialPhi(UsePhi); return Res; } @@ -187,6 +204,11 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { // argument. // IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c) // We recursively try to remove them. +MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi) { + assert(Phi && "Can only remove concrete Phi."); + auto OperRange = Phi->operands(); + return tryRemoveTrivialPhi(Phi, OperRange); +} template <class RangeType> MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands) { @@ -218,17 +240,49 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, return recursePhi(Same); } -void MemorySSAUpdater::insertUse(MemoryUse *MU) { +void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) { InsertedPHIs.clear(); MU->setDefiningAccess(getPreviousDef(MU)); - // Unlike for defs, there is no extra work to do. Because uses do not create - // new may-defs, there are only two cases: - // + + // In cases without unreachable blocks, because uses do not create new + // may-defs, there are only two cases: // 1. There was a def already below us, and therefore, we should not have // created a phi node because it was already needed for the def. // // 2. There is no def below us, and therefore, there is no extra renaming work // to do. + + // In cases with unreachable blocks, where the unnecessary Phis were + // optimized out, adding the Use may re-insert those Phis. Hence, when + // inserting Uses outside of the MSSA creation process, and new Phis were + // added, rename all uses if we are asked. + + if (!RenameUses && !InsertedPHIs.empty()) { + auto *Defs = MSSA->getBlockDefs(MU->getBlock()); + (void)Defs; + assert((!Defs || (++Defs->begin() == Defs->end())) && + "Block may have only a Phi or no defs"); + } + + if (RenameUses && InsertedPHIs.size()) { + SmallPtrSet<BasicBlock *, 16> Visited; + BasicBlock *StartBlock = MU->getBlock(); + + if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) { + MemoryAccess *FirstDef = &*Defs->begin(); + // Convert to incoming value if it's a memorydef. A phi *is* already an + // incoming value. + if (auto *MD = dyn_cast<MemoryDef>(FirstDef)) + FirstDef = MD->getDefiningAccess(); + + MSSA->renamePass(MU->getBlock(), FirstDef, Visited); + } + // We just inserted a phi into this block, so the incoming value will + // become the phi anyway, so it does not matter what we pass. + for (auto &MP : InsertedPHIs) + if (MemoryPhi *Phi = cast_or_null<MemoryPhi>(MP)) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. @@ -260,33 +314,35 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // See if we had a local def, and if not, go hunting. MemoryAccess *DefBefore = getPreviousDef(MD); - bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock(); + bool DefBeforeSameBlock = false; + if (DefBefore->getBlock() == MD->getBlock() && + !(isa<MemoryPhi>(DefBefore) && + std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) != + InsertedPHIs.end())) + DefBeforeSameBlock = true; // There is a def before us, which means we can replace any store/phi uses // of that thing with us, since we are in the way of whatever was there // before. // We now define that def's memorydefs and memoryphis if (DefBeforeSameBlock) { - for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end(); - UI != UE;) { - Use &U = *UI++; + DefBefore->replaceUsesWithIf(MD, [MD](Use &U) { // Leave the MemoryUses alone. // Also make sure we skip ourselves to avoid self references. - if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD) - continue; + User *Usr = U.getUser(); + return !isa<MemoryUse>(Usr) && Usr != MD; // Defs are automatically unoptimized when the user is set to MD below, // because the isOptimized() call will fail to find the same ID. - U.set(MD); - } + }); } // and that def is now our defining access. MD->setDefiningAccess(DefBefore); - // Remember the index where we may insert new phis below. - unsigned NewPhiIndex = InsertedPHIs.size(); - SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); + + // Remember the index where we may insert new phis. + unsigned NewPhiIndex = InsertedPHIs.size(); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it // did. Because every may-def is the same, any phis/etc we would create, it @@ -302,46 +358,54 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // If this is the first def in the block and this insert is in an arbitrary // place, compute IDF and place phis. + SmallPtrSet<BasicBlock *, 2> DefiningBlocks; + + // If this is the last Def in the block, also compute IDF based on MD, since + // this may a new Def added, and we may need additional Phis. auto Iter = MD->getDefsIterator(); ++Iter; auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); - if (Iter == IterEnd) { - ForwardIDFCalculator IDFs(*MSSA->DT); - SmallVector<BasicBlock *, 32> IDFBlocks; - SmallPtrSet<BasicBlock *, 2> DefiningBlocks; + if (Iter == IterEnd) DefiningBlocks.insert(MD->getBlock()); - IDFs.setDefiningBlocks(DefiningBlocks); - IDFs.calculate(IDFBlocks); - SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs; - for (auto *BBIDF : IDFBlocks) - if (!MSSA->getMemoryAccess(BBIDF)) { - auto *MPhi = MSSA->createMemoryPhi(BBIDF); - NewInsertedPHIs.push_back(MPhi); - // Add the phis created into the IDF blocks to NonOptPhis, so they are - // not optimized out as trivial by the call to getPreviousDefFromEnd - // below. Once they are complete, all these Phis are added to the - // FixupList, and removed from NonOptPhis inside fixupDefs(). - NonOptPhis.insert(MPhi); - } - for (auto &MPhi : NewInsertedPHIs) { - auto *BBIDF = MPhi->getBlock(); - for (auto *Pred : predecessors(BBIDF)) { - DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef; - MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), - Pred); - } + for (const auto &VH : InsertedPHIs) + if (const auto *RealPHI = cast_or_null<MemoryPhi>(VH)) + DefiningBlocks.insert(RealPHI->getBlock()); + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector<BasicBlock *, 32> IDFBlocks; + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs; + for (auto *BBIDF : IDFBlocks) { + auto *MPhi = MSSA->getMemoryAccess(BBIDF); + if (!MPhi) { + MPhi = MSSA->createMemoryPhi(BBIDF); + NewInsertedPHIs.push_back(MPhi); } - - // Re-take the index where we're adding the new phis, because the above - // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs. - NewPhiIndex = InsertedPHIs.size(); - for (auto &MPhi : NewInsertedPHIs) { - InsertedPHIs.push_back(&*MPhi); - FixupList.push_back(&*MPhi); + // Add the phis created into the IDF blocks to NonOptPhis, so they are not + // optimized out as trivial by the call to getPreviousDefFromEnd below. + // Once they are complete, all these Phis are added to the FixupList, and + // removed from NonOptPhis inside fixupDefs(). Existing Phis in IDF may + // need fixing as well, and potentially be trivial before this insertion, + // hence add all IDF Phis. See PR43044. + NonOptPhis.insert(MPhi); + } + for (auto &MPhi : NewInsertedPHIs) { + auto *BBIDF = MPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef; + MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred); } } + // Re-take the index where we're adding the new phis, because the above call + // to getPreviousDefFromEnd, may have inserted into InsertedPHIs. + NewPhiIndex = InsertedPHIs.size(); + for (auto &MPhi : NewInsertedPHIs) { + InsertedPHIs.push_back(&*MPhi); + FixupList.push_back(&*MPhi); + } + FixupList.push_back(MD); } @@ -458,8 +522,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) { void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) { if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) { MPhi->unorderedDeleteIncomingBlock(From); - if (MPhi->getNumIncomingValues() == 1) - removeMemoryAccess(MPhi); + tryRemoveTrivialPhi(MPhi); } } @@ -475,34 +538,51 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From, Found = true; return false; }); - if (MPhi->getNumIncomingValues() == 1) - removeMemoryAccess(MPhi); + tryRemoveTrivialPhi(MPhi); + } +} + +static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, + const ValueToValueMapTy &VMap, + PhiToDefMap &MPhiMap, + bool CloneWasSimplified, + MemorySSA *MSSA) { + MemoryAccess *InsnDefining = MA; + if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) { + if (!MSSA->isLiveOnEntryDef(DefMUD)) { + Instruction *DefMUDI = DefMUD->getMemoryInst(); + assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); + if (Instruction *NewDefMUDI = + cast_or_null<Instruction>(VMap.lookup(DefMUDI))) { + InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); + if (!CloneWasSimplified) + assert(InsnDefining && "Defining instruction cannot be nullptr."); + else if (!InsnDefining || isa<MemoryUse>(InsnDefining)) { + // The clone was simplified, it's no longer a MemoryDef, look up. + auto DefIt = DefMUD->getDefsIterator(); + // Since simplified clones only occur in single block cloning, a + // previous definition must exist, otherwise NewDefMUDI would not + // have been found in VMap. + assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() && + "Previous def must exist"); + InsnDefining = getNewDefiningAccessForClone( + &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA); + } + } + } + } else { + MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining); + if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) + InsnDefining = NewDefPhi; } + assert(InsnDefining && "Defining instruction cannot be nullptr."); + return InsnDefining; } void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, bool CloneWasSimplified) { - auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * { - MemoryAccess *InsnDefining = MA; - if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) { - if (!MSSA->isLiveOnEntryDef(DefMUD)) { - Instruction *DefMUDI = DefMUD->getMemoryInst(); - assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); - if (Instruction *NewDefMUDI = - cast_or_null<Instruction>(VMap.lookup(DefMUDI))) - InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); - } - } else { - MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining); - if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) - InsnDefining = NewDefPhi; - } - assert(InsnDefining && "Defining instruction cannot be nullptr."); - return InsnDefining; - }; - const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); if (!Acc) return; @@ -519,9 +599,13 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, if (Instruction *NewInsn = dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) { MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( - NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), - CloneWasSimplified ? nullptr : MUD); - MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); + NewInsn, + getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap, + MPhiMap, CloneWasSimplified, MSSA), + /*Template=*/CloneWasSimplified ? nullptr : MUD, + /*CreationMustSucceed=*/CloneWasSimplified ? false : true); + if (NewUseOrDef) + MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } } } @@ -563,8 +647,7 @@ void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock( // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be // replaced with the unique value. - if (HasUniqueIncomingValue) - removeMemoryAccess(NewMPhi); + tryRemoveTrivialPhi(NewMPhi); } void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, @@ -770,6 +853,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, } else { // Single predecessor, BB cannot be dead. GetLastDef of Pred. assert(Count == 1 && Pred && "Single predecessor expected."); + // BB can be unreachable though, return LoE if that is the case. + if (!DT.getNode(BB)) + return MSSA->getLiveOnEntryDef(); BB = Pred; } }; @@ -1010,7 +1096,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, for (; UI != E;) { Use &U = *UI; ++UI; - MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser()); + MemoryAccess *Usr = cast<MemoryAccess>(U.getUser()); if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) { BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U); if (!DT.dominates(DominatingBlock, DominatedBlock)) @@ -1052,9 +1138,9 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, // Now reinsert it into the IR and do whatever fixups needed. if (auto *MD = dyn_cast<MemoryDef>(What)) - insertDef(MD); + insertDef(MD, /*RenameUses=*/true); else - insertUse(cast<MemoryUse>(What)); + insertUse(cast<MemoryUse>(What), /*RenameUses=*/true); // Clear dangling pointers. We added all MemoryPhi users, but not all // of them are removed by fixupDefs(). @@ -1073,7 +1159,13 @@ void MemorySSAUpdater::moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where) { void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where) { - return moveTo(What, BB, Where); + if (Where != MemorySSA::InsertionPlace::BeforeTerminator) + return moveTo(What, BB, Where); + + if (auto *Where = MSSA->getMemoryAccess(BB->getTerminator())) + return moveBefore(What, Where); + else + return moveTo(What, BB, MemorySSA::InsertionPlace::End); } // All accesses in To used to be in From. Move to end and update access lists. @@ -1084,25 +1176,32 @@ void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To, if (!Accs) return; + assert(Start->getParent() == To && "Incorrect Start instruction"); MemoryAccess *FirstInNew = nullptr; for (Instruction &I : make_range(Start->getIterator(), To->end())) if ((FirstInNew = MSSA->getMemoryAccess(&I))) break; - if (!FirstInNew) - return; + if (FirstInNew) { + auto *MUD = cast<MemoryUseOrDef>(FirstInNew); + do { + auto NextIt = ++MUD->getIterator(); + MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) + ? nullptr + : cast<MemoryUseOrDef>(&*NextIt); + MSSA->moveTo(MUD, To, MemorySSA::End); + // Moving MUD from Accs in the moveTo above, may delete Accs, so we need + // to retrieve it again. + Accs = MSSA->getWritableBlockAccesses(From); + MUD = NextMUD; + } while (MUD); + } - auto *MUD = cast<MemoryUseOrDef>(FirstInNew); - do { - auto NextIt = ++MUD->getIterator(); - MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) - ? nullptr - : cast<MemoryUseOrDef>(&*NextIt); - MSSA->moveTo(MUD, To, MemorySSA::End); - // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to - // retrieve it again. - Accs = MSSA->getWritableBlockAccesses(From); - MUD = NextMUD; - } while (MUD); + // If all accesses were moved and only a trivial Phi remains, we try to remove + // that Phi. This is needed when From is going to be deleted. + auto *Defs = MSSA->getWritableBlockDefs(From); + if (Defs && !Defs->empty()) + if (auto *Phi = dyn_cast<MemoryPhi>(&*Defs->begin())) + tryRemoveTrivialPhi(Phi); } void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, @@ -1118,7 +1217,7 @@ void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start) { - assert(From->getSinglePredecessor() == To && + assert(From->getUniquePredecessor() == To && "From block is expected to have a single predecessor (To)."); moveAllAccesses(From, To, Start); for (BasicBlock *Succ : successors(From)) @@ -1173,8 +1272,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor( return false; }); Phi->addIncoming(NewPhi, New); - if (onlySingleValue(NewPhi)) - removeMemoryAccess(NewPhi); + tryRemoveTrivialPhi(NewPhi); } } @@ -1239,10 +1337,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { unsigned PhisSize = PhisToOptimize.size(); while (PhisSize-- > 0) if (MemoryPhi *MP = - cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) { - auto OperRange = MP->operands(); - tryRemoveTrivialPhi(MP, OperRange); - } + cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) + tryRemoveTrivialPhi(MP); } } @@ -1256,8 +1352,7 @@ void MemorySSAUpdater::removeBlocks( if (!DeadBlocks.count(Succ)) if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) { MP->unorderedDeleteIncomingBlock(BB); - if (MP->getNumIncomingValues() == 1) - removeMemoryAccess(MP); + tryRemoveTrivialPhi(MP); } // Drop all references of all accesses in BB if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB)) @@ -1281,10 +1376,8 @@ void MemorySSAUpdater::removeBlocks( void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) { for (auto &VH : UpdatedPHIs) - if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) { - auto OperRange = MPhi->operands(); - tryRemoveTrivialPhi(MPhi, OperRange); - } + if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) + tryRemoveTrivialPhi(MPhi); } void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index 519242759824..52b884fb88e0 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index e25eb290a665..8a1206f49c21 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" +#include "llvm/InitializePasses.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Pass.h" @@ -319,7 +320,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, auto *CalledValue = CS.getCalledValue(); auto *CalledFunction = CS.getCalledFunction(); if (CalledValue && !CalledFunction) { - CalledValue = CalledValue->stripPointerCastsNoFollowAliases(); + CalledValue = CalledValue->stripPointerCasts(); // Stripping pointer casts can reveal a called function. CalledFunction = dyn_cast<Function>(CalledValue); } @@ -466,8 +467,9 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(), // FIXME: refactor this to use the same code that inliner is using. // Don't try to import functions with noinline attribute. - F.getAttributes().hasFnAttribute(Attribute::NoInline)}; - auto FuncSummary = llvm::make_unique<FunctionSummary>( + F.getAttributes().hasFnAttribute(Attribute::NoInline), + F.hasFnAttribute(Attribute::AlwaysInline)}; + auto FuncSummary = std::make_unique<FunctionSummary>( Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), @@ -598,7 +600,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized); - auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags, + auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags, RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); @@ -616,7 +618,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, /* Live = */ false, A.isDSOLocal(), A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr()); - auto AS = llvm::make_unique<AliasSummary>(Flags); + auto AS = std::make_unique<AliasSummary>(Flags); auto *Aliasee = A.getBaseObject(); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); assert(AliaseeVI && "Alias expects aliasee summary to be available"); @@ -696,14 +698,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // Create the appropriate summary type. if (Function *F = dyn_cast<Function>(GV)) { std::unique_ptr<FunctionSummary> Summary = - llvm::make_unique<FunctionSummary>( + std::make_unique<FunctionSummary>( GVFlags, /*InstCount=*/0, FunctionSummary::FFlags{ F->hasFnAttribute(Attribute::ReadNone), F->hasFnAttribute(Attribute::ReadOnly), F->hasFnAttribute(Attribute::NoRecurse), F->returnDoesNotAlias(), - /* NoInline = */ false}, + /* NoInline = */ false, + F->hasFnAttribute(Attribute::AlwaysInline)}, /*EntryCount=*/0, ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, ArrayRef<GlobalValue::GUID>{}, @@ -714,7 +717,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr<GlobalVarSummary> Summary = - llvm::make_unique<GlobalVarSummary>( + std::make_unique<GlobalVarSummary>( GVFlags, GlobalVarSummary::GVarFlags(false, false), ArrayRef<ValueInfo>{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); @@ -741,7 +744,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( else if (F.hasProfileData()) { LoopInfo LI{DT}; BranchProbabilityInfo BPI{F, LI}; - BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI); + BFIPtr = std::make_unique<BlockFrequencyInfo>(F, BPI, LI); BFI = BFIPtr.get(); } @@ -813,11 +816,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( if (!ModuleSummaryDotFile.empty()) { std::error_code EC; - raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None); if (EC) report_fatal_error(Twine("Failed to open dot file ") + ModuleSummaryDotFile + ": " + EC.message() + "\n"); - Index.exportToDot(OSDot); + Index.exportToDot(OSDot, {}); } return Index; diff --git a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp index b616cd6f762b..952c2cbfec4e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp @@ -7,20 +7,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MustExecute.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; +#define DEBUG_TYPE "must-execute" + const DenseMap<BasicBlock *, ColorVector> & LoopSafetyInfo::getBlockColors() const { return BlockColors; @@ -306,6 +313,17 @@ namespace { } bool runOnFunction(Function &F) override; }; + struct MustBeExecutedContextPrinter : public ModulePass { + static char ID; + + MustBeExecutedContextPrinter() : ModulePass(ID) { + initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnModule(Module &M) override; + }; } char MustExecutePrinter::ID = 0; @@ -320,6 +338,57 @@ FunctionPass *llvm::createMustExecutePrinter() { return new MustExecutePrinter(); } +char MustBeExecutedContextPrinter::ID = 0; +INITIALIZE_PASS_BEGIN( + MustBeExecutedContextPrinter, "print-must-be-executed-contexts", + "print the must-be-executed-contexed for all instructions", false, true) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(MustBeExecutedContextPrinter, + "print-must-be-executed-contexts", + "print the must-be-executed-contexed for all instructions", + false, true) + +ModulePass *llvm::createMustBeExecutedContextPrinter() { + return new MustBeExecutedContextPrinter(); +} + +bool MustBeExecutedContextPrinter::runOnModule(Module &M) { + // We provide non-PM analysis here because the old PM doesn't like to query + // function passes from a module pass. + SmallVector<PostDominatorTree *, 8> PDTs; + SmallVector<DominatorTree *, 8> DTs; + SmallVector<LoopInfo *, 8> LIs; + + GetterTy<LoopInfo> LIGetter = [&](const Function &F) { + DominatorTree *DT = new DominatorTree(const_cast<Function &>(F)); + LoopInfo *LI = new LoopInfo(*DT); + DTs.push_back(DT); + LIs.push_back(LI); + return LI; + }; + GetterTy<PostDominatorTree> PDTGetter = [&](const Function &F) { + PostDominatorTree *PDT = new PostDominatorTree(const_cast<Function &>(F)); + PDTs.push_back(PDT); + return PDT; + }; + MustBeExecutedContextExplorer Explorer(true, LIGetter, PDTGetter); + for (Function &F : M) { + for (Instruction &I : instructions(F)) { + dbgs() << "-- Explore context of: " << I << "\n"; + for (const Instruction *CI : Explorer.range(&I)) + dbgs() << " [F: " << CI->getFunction()->getName() << "] " << *CI + << "\n"; + } + } + + DeleteContainerPointers(PDTs); + DeleteContainerPointers(LIs); + DeleteContainerPointers(DTs); + return false; +} + static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) { // TODO: merge these two routines. For the moment, we display the best // result obtained by *either* implementation. This is a bit unfair since no @@ -396,3 +465,248 @@ bool MustExecutePrinter::runOnFunction(Function &F) { return false; } + +/// Return true if \p L might be an endless loop. +static bool maybeEndlessLoop(const Loop &L) { + if (L.getHeader()->getParent()->hasFnAttribute(Attribute::WillReturn)) + return false; + // TODO: Actually try to prove it is not. + // TODO: If maybeEndlessLoop is going to be expensive, cache it. + return true; +} + +static bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI) { + if (!LI) + return false; + using RPOTraversal = ReversePostOrderTraversal<const Function *>; + RPOTraversal FuncRPOT(&F); + return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal, + const LoopInfo>(FuncRPOT, *LI); +} + +/// Lookup \p Key in \p Map and return the result, potentially after +/// initializing the optional through \p Fn(\p args). +template <typename K, typename V, typename FnTy, typename... ArgsTy> +static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map, + FnTy &&Fn, ArgsTy&&... args) { + Optional<V> &OptVal = Map[Key]; + if (!OptVal.hasValue()) + OptVal = Fn(std::forward<ArgsTy>(args)...); + return OptVal.getValue(); +} + +const BasicBlock * +MustBeExecutedContextExplorer::findForwardJoinPoint(const BasicBlock *InitBB) { + const LoopInfo *LI = LIGetter(*InitBB->getParent()); + const PostDominatorTree *PDT = PDTGetter(*InitBB->getParent()); + + LLVM_DEBUG(dbgs() << "\tFind forward join point for " << InitBB->getName() + << (LI ? " [LI]" : "") << (PDT ? " [PDT]" : "")); + + const Function &F = *InitBB->getParent(); + const Loop *L = LI ? LI->getLoopFor(InitBB) : nullptr; + const BasicBlock *HeaderBB = L ? L->getHeader() : InitBB; + bool WillReturnAndNoThrow = (F.hasFnAttribute(Attribute::WillReturn) || + (L && !maybeEndlessLoop(*L))) && + F.doesNotThrow(); + LLVM_DEBUG(dbgs() << (L ? " [in loop]" : "") + << (WillReturnAndNoThrow ? " [WillReturn] [NoUnwind]" : "") + << "\n"); + + // Determine the adjacent blocks in the given direction but exclude (self) + // loops under certain circumstances. + SmallVector<const BasicBlock *, 8> Worklist; + for (const BasicBlock *SuccBB : successors(InitBB)) { + bool IsLatch = SuccBB == HeaderBB; + // Loop latches are ignored in forward propagation if the loop cannot be + // endless and may not throw: control has to go somewhere. + if (!WillReturnAndNoThrow || !IsLatch) + Worklist.push_back(SuccBB); + } + LLVM_DEBUG(dbgs() << "\t\t#Worklist: " << Worklist.size() << "\n"); + + // If there are no other adjacent blocks, there is no join point. + if (Worklist.empty()) + return nullptr; + + // If there is one adjacent block, it is the join point. + if (Worklist.size() == 1) + return Worklist[0]; + + // Try to determine a join block through the help of the post-dominance + // tree. If no tree was provided, we perform simple pattern matching for one + // block conditionals and one block loops only. + const BasicBlock *JoinBB = nullptr; + if (PDT) + if (const auto *InitNode = PDT->getNode(InitBB)) + if (const auto *IDomNode = InitNode->getIDom()) + JoinBB = IDomNode->getBlock(); + + if (!JoinBB && Worklist.size() == 2) { + const BasicBlock *Succ0 = Worklist[0]; + const BasicBlock *Succ1 = Worklist[1]; + const BasicBlock *Succ0UniqueSucc = Succ0->getUniqueSuccessor(); + const BasicBlock *Succ1UniqueSucc = Succ1->getUniqueSuccessor(); + if (Succ0UniqueSucc == InitBB) { + // InitBB -> Succ0 -> InitBB + // InitBB -> Succ1 = JoinBB + JoinBB = Succ1; + } else if (Succ1UniqueSucc == InitBB) { + // InitBB -> Succ1 -> InitBB + // InitBB -> Succ0 = JoinBB + JoinBB = Succ0; + } else if (Succ0 == Succ1UniqueSucc) { + // InitBB -> Succ0 = JoinBB + // InitBB -> Succ1 -> Succ0 = JoinBB + JoinBB = Succ0; + } else if (Succ1 == Succ0UniqueSucc) { + // InitBB -> Succ0 -> Succ1 = JoinBB + // InitBB -> Succ1 = JoinBB + JoinBB = Succ1; + } else if (Succ0UniqueSucc == Succ1UniqueSucc) { + // InitBB -> Succ0 -> JoinBB + // InitBB -> Succ1 -> JoinBB + JoinBB = Succ0UniqueSucc; + } + } + + if (!JoinBB && L) + JoinBB = L->getUniqueExitBlock(); + + if (!JoinBB) + return nullptr; + + LLVM_DEBUG(dbgs() << "\t\tJoin block candidate: " << JoinBB->getName() << "\n"); + + // In forward direction we check if control will for sure reach JoinBB from + // InitBB, thus it can not be "stopped" along the way. Ways to "stop" control + // are: infinite loops and instructions that do not necessarily transfer + // execution to their successor. To check for them we traverse the CFG from + // the adjacent blocks to the JoinBB, looking at all intermediate blocks. + + // If we know the function is "will-return" and "no-throw" there is no need + // for futher checks. + if (!F.hasFnAttribute(Attribute::WillReturn) || !F.doesNotThrow()) { + + auto BlockTransfersExecutionToSuccessor = [](const BasicBlock *BB) { + return isGuaranteedToTransferExecutionToSuccessor(BB); + }; + + SmallPtrSet<const BasicBlock *, 16> Visited; + while (!Worklist.empty()) { + const BasicBlock *ToBB = Worklist.pop_back_val(); + if (ToBB == JoinBB) + continue; + + // Make sure all loops in-between are finite. + if (!Visited.insert(ToBB).second) { + if (!F.hasFnAttribute(Attribute::WillReturn)) { + if (!LI) + return nullptr; + + bool MayContainIrreducibleControl = getOrCreateCachedOptional( + &F, IrreducibleControlMap, mayContainIrreducibleControl, F, LI); + if (MayContainIrreducibleControl) + return nullptr; + + const Loop *L = LI->getLoopFor(ToBB); + if (L && maybeEndlessLoop(*L)) + return nullptr; + } + + continue; + } + + // Make sure the block has no instructions that could stop control + // transfer. + bool TransfersExecution = getOrCreateCachedOptional( + ToBB, BlockTransferMap, BlockTransfersExecutionToSuccessor, ToBB); + if (!TransfersExecution) + return nullptr; + + for (const BasicBlock *AdjacentBB : successors(ToBB)) + Worklist.push_back(AdjacentBB); + } + } + + LLVM_DEBUG(dbgs() << "\tJoin block: " << JoinBB->getName() << "\n"); + return JoinBB; +} + +const Instruction * +MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( + MustBeExecutedIterator &It, const Instruction *PP) { + if (!PP) + return PP; + LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n"); + + // If we explore only inside a given basic block we stop at terminators. + if (!ExploreInterBlock && PP->isTerminator()) { + LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n"); + return nullptr; + } + + // If we do not traverse the call graph we check if we can make progress in + // the current function. First, check if the instruction is guaranteed to + // transfer execution to the successor. + bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP); + if (!TransfersExecution) + return nullptr; + + // If this is not a terminator we know that there is a single instruction + // after this one that is executed next if control is transfered. If not, + // we can try to go back to a call site we entered earlier. If none exists, we + // do not know any instruction that has to be executd next. + if (!PP->isTerminator()) { + const Instruction *NextPP = PP->getNextNode(); + LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n"); + return NextPP; + } + + // Finally, we have to handle terminators, trivial ones first. + assert(PP->isTerminator() && "Expected a terminator!"); + + // A terminator without a successor is not handled yet. + if (PP->getNumSuccessors() == 0) { + LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n"); + return nullptr; + } + + // A terminator with a single successor, we will continue at the beginning of + // that one. + if (PP->getNumSuccessors() == 1) { + LLVM_DEBUG( + dbgs() << "\tUnconditional terminator, continue with successor\n"); + return &PP->getSuccessor(0)->front(); + } + + // Multiple successors mean we need to find the join point where control flow + // converges again. We use the findForwardJoinPoint helper function with + // information about the function and helper analyses, if available. + if (const BasicBlock *JoinBB = findForwardJoinPoint(PP->getParent())) + return &JoinBB->front(); + + LLVM_DEBUG(dbgs() << "\tNo join point found\n"); + return nullptr; +} + +MustBeExecutedIterator::MustBeExecutedIterator( + MustBeExecutedContextExplorer &Explorer, const Instruction *I) + : Explorer(Explorer), CurInst(I) { + reset(I); +} + +void MustBeExecutedIterator::reset(const Instruction *I) { + CurInst = I; + Visited.clear(); + Visited.insert(I); +} + +const Instruction *MustBeExecutedIterator::advance() { + assert(CurInst && "Cannot advance an end iterator!"); + const Instruction *Next = + Explorer.getMustBeExecutedNextInstruction(*this, CurInst); + if (Next && !Visited.insert(Next).second) + Next = nullptr; + return Next; +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp index 72c40a0be232..44e6637f6337 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -39,7 +40,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) BPI.calculate(*F, LI); // Finally compute BFI. - OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI); + OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI); BFI = OwnedBFI.get(); } @@ -97,7 +98,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) { else BFI = nullptr; - ORE = llvm::make_unique<OptimizationRemarkEmitter>(&Fn, BFI); + ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn, BFI); return false; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp b/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp index 458c0a7de6c2..e947e5e388a8 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp @@ -21,7 +21,7 @@ bool OrderedInstructions::localDominates(const Instruction *InstA, const BasicBlock *IBB = InstA->getParent(); auto OBB = OBBMap.find(IBB); if (OBB == OBBMap.end()) - OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first; + OBB = OBBMap.insert({IBB, std::make_unique<OrderedBasicBlock>(IBB)}).first; return OBB->second->dominates(InstA, InstB); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp index 49749bc44746..198647dafbef 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -47,25 +48,28 @@ bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA, // we're ultimately interested in, and all of the reachable values, i.e. // including phis, as that makes invalidateValue easier. void PhiValues::processPhi(const PHINode *Phi, - SmallVector<const PHINode *, 8> &Stack) { + SmallVectorImpl<const PHINode *> &Stack) { // Initialize the phi with the next depth number. assert(DepthMap.lookup(Phi) == 0); assert(NextDepthNumber != UINT_MAX); - unsigned int DepthNumber = ++NextDepthNumber; - DepthMap[Phi] = DepthNumber; + unsigned int RootDepthNumber = ++NextDepthNumber; + DepthMap[Phi] = RootDepthNumber; // Recursively process the incoming phis of this phi. TrackedValues.insert(PhiValuesCallbackVH(const_cast<PHINode *>(Phi), this)); for (Value *PhiOp : Phi->incoming_values()) { if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) { // Recurse if the phi has not yet been visited. - if (DepthMap.lookup(PhiPhiOp) == 0) + unsigned int OpDepthNumber = DepthMap.lookup(PhiPhiOp); + if (OpDepthNumber == 0) { processPhi(PhiPhiOp, Stack); - assert(DepthMap.lookup(PhiPhiOp) != 0); + OpDepthNumber = DepthMap.lookup(PhiPhiOp); + assert(OpDepthNumber != 0); + } // If the phi did not become part of a component then this phi and that // phi are part of the same component, so adjust the depth number. - if (!ReachableMap.count(DepthMap[PhiPhiOp])) - DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]); + if (!ReachableMap.count(OpDepthNumber)) + DepthMap[Phi] = std::min(DepthMap[Phi], OpDepthNumber); } else { TrackedValues.insert(PhiValuesCallbackVH(PhiOp, this)); } @@ -76,48 +80,59 @@ void PhiValues::processPhi(const PHINode *Phi, // If the depth number has not changed then we've finished collecting the phis // of a strongly connected component. - if (DepthMap[Phi] == DepthNumber) { + if (DepthMap[Phi] == RootDepthNumber) { // Collect the reachable values for this component. The phis of this - // component will be those on top of the depth stach with the same or + // component will be those on top of the depth stack with the same or // greater depth number. - ConstValueSet Reachable; - while (!Stack.empty() && DepthMap[Stack.back()] >= DepthNumber) { + ConstValueSet &Reachable = ReachableMap[RootDepthNumber]; + while (true) { const PHINode *ComponentPhi = Stack.pop_back_val(); Reachable.insert(ComponentPhi); - DepthMap[ComponentPhi] = DepthNumber; + for (Value *Op : ComponentPhi->incoming_values()) { if (PHINode *PhiOp = dyn_cast<PHINode>(Op)) { // If this phi is not part of the same component then that component // is guaranteed to have been completed before this one. Therefore we // can just add its reachable values to the reachable values of this // component. - auto It = ReachableMap.find(DepthMap[PhiOp]); - if (It != ReachableMap.end()) - Reachable.insert(It->second.begin(), It->second.end()); - } else { + unsigned int OpDepthNumber = DepthMap[PhiOp]; + if (OpDepthNumber != RootDepthNumber) { + auto It = ReachableMap.find(OpDepthNumber); + if (It != ReachableMap.end()) + Reachable.insert(It->second.begin(), It->second.end()); + } + } else Reachable.insert(Op); - } } + + if (Stack.empty()) + break; + + unsigned int &ComponentDepthNumber = DepthMap[Stack.back()]; + if (ComponentDepthNumber < RootDepthNumber) + break; + + ComponentDepthNumber = RootDepthNumber; } - ReachableMap.insert({DepthNumber,Reachable}); // Filter out phis to get the non-phi reachable values. - ValueSet NonPhi; + ValueSet &NonPhi = NonPhiReachableMap[RootDepthNumber]; for (const Value *V : Reachable) if (!isa<PHINode>(V)) - NonPhi.insert(const_cast<Value*>(V)); - NonPhiReachableMap.insert({DepthNumber,NonPhi}); + NonPhi.insert(const_cast<Value *>(V)); } } const PhiValues::ValueSet &PhiValues::getValuesForPhi(const PHINode *PN) { - if (DepthMap.count(PN) == 0) { + unsigned int DepthNumber = DepthMap.lookup(PN); + if (DepthNumber == 0) { SmallVector<const PHINode *, 8> Stack; processPhi(PN, Stack); + DepthNumber = DepthMap.lookup(PN); assert(Stack.empty()); + assert(DepthNumber != 0); } - assert(DepthMap.lookup(PN) != 0); - return NonPhiReachableMap[DepthMap[PN]]; + return NonPhiReachableMap[DepthNumber]; } void PhiValues::invalidateValue(const Value *V) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp index 4afe22bd5342..f01d51504d7c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp @@ -12,7 +12,9 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" @@ -32,6 +34,11 @@ static constexpr bool ExpensiveChecksEnabled = false; char PostDominatorTreeWrapperPass::ID = 0; +PostDominatorTreeWrapperPass::PostDominatorTreeWrapperPass() + : FunctionPass(ID) { + initializePostDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); +} + INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree", "Post-Dominator Tree Construction", true, true) @@ -44,6 +51,28 @@ bool PostDominatorTree::invalidate(Function &F, const PreservedAnalyses &PA, PAC.preservedSet<CFGAnalyses>()); } +bool PostDominatorTree::dominates(const Instruction *I1, + const Instruction *I2) const { + assert(I1 && I2 && "Expecting valid I1 and I2"); + + const BasicBlock *BB1 = I1->getParent(); + const BasicBlock *BB2 = I2->getParent(); + + if (BB1 != BB2) + return Base::dominates(BB1, BB2); + + // PHINodes in a block are unordered. + if (isa<PHINode>(I1) && isa<PHINode>(I2)) + return false; + + // Loop through the basic block until we find I1 or I2. + BasicBlock::const_iterator I = BB1->begin(); + for (; &*I != I1 && &*I != I2; ++I) + /*empty*/; + + return &*I == I2; +} + bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) { DT.recalculate(F); return false; diff --git a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp index dce19d6d546e..911d39d9a263 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -18,6 +18,8 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; // The following two parameters determine the threshold for a count to be @@ -45,6 +47,13 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( " blocks required to reach the -profile-summary-cutoff-hot" " percentile exceeds this count.")); +static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( + "profile-summary-large-working-set-size-threshold", cl::Hidden, + cl::init(12500), cl::ZeroOrMore, + cl::desc("The code working set size is considered large if the number of" + " blocks required to reach the -profile-summary-cutoff-hot" + " percentile exceeds this count.")); + // The next two options override the counts derived from summary computation and // are useful for debugging purposes. static cl::opt<int> ProfileSummaryHotCount( @@ -186,6 +195,31 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, return true; } +// Like isFunctionHotInCallGraph but for a given cutoff. +bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + if (!F || !computeSummary()) + return false; + if (auto FunctionCount = F->getEntryCount()) + if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + return true; + + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) + return true; + } + for (const auto &BB : *F) + if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + return true; + return false; +} + /// Returns true if the function's entry is a cold. If it returns false, it /// either means it is not cold or it is unknown whether it is cold or not (for /// example, no profile data is available). @@ -222,6 +256,23 @@ void ProfileSummaryInfo::computeThresholds() { "Cold count threshold cannot exceed hot count threshold!"); HasHugeWorkingSetSize = HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; +} + +Optional<uint64_t> ProfileSummaryInfo::computeThreshold(int PercentileCutoff) { + if (!computeSummary()) + return None; + auto iter = ThresholdCache.find(PercentileCutoff); + if (iter != ThresholdCache.end()) { + return iter->second; + } + auto &DetailedSummary = Summary->getDetailedSummary(); + auto &Entry = + getEntryForPercentile(DetailedSummary, PercentileCutoff); + uint64_t CountThreshold = Entry.MinCount; + ThresholdCache[PercentileCutoff] = CountThreshold; + return CountThreshold; } bool ProfileSummaryInfo::hasHugeWorkingSetSize() { @@ -230,6 +281,12 @@ bool ProfileSummaryInfo::hasHugeWorkingSetSize() { return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); } +bool ProfileSummaryInfo::hasLargeWorkingSetSize() { + if (!HasLargeWorkingSetSize) + computeThresholds(); + return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); +} + bool ProfileSummaryInfo::isHotCount(uint64_t C) { if (!HotCountThreshold) computeThresholds(); @@ -242,6 +299,11 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } +bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { + auto CountThreshold = computeThreshold(PercentileCutoff); + return CountThreshold && C >= CountThreshold.getValue(); +} + uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { if (!HotCountThreshold) computeThresholds(); @@ -265,6 +327,13 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, return Count && isColdCount(*Count); } +bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, + BlockFrequencyInfo *BFI) { + auto Count = BFI->getBlockProfileCount(BB); + return Count && isHotCountNthPercentile(PercentileCutoff, *Count); +} + bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI) { auto C = getProfileCount(CS.getInstruction(), BFI); diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp index 8ba38adfb0d2..88629517d484 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/RegionInfo.h" #include "llvm/ADT/Statistic.h" +#include "llvm/InitializePasses.h" #ifndef NDEBUG #include "llvm/Analysis/RegionPrinter.h" #endif diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp index 5bdcb31fbe99..020ff85d1b98 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index bc2cfd6fcc42..26a9a5ddf1ea 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -112,6 +112,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -148,6 +149,7 @@ STATISTIC(NumBruteForceTripCountsComputed, static cl::opt<unsigned> MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::ZeroOrMore, cl::desc("Maximum number of iterations SCEV will " "symbolically execute a constant " "derived loop"), @@ -157,6 +159,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, static cl::opt<bool> VerifySCEV( "verify-scev", cl::Hidden, cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); +static cl::opt<bool> VerifySCEVStrict( + "verify-scev-strict", cl::Hidden, + cl::desc("Enable stricter verification with -verify-scev is passed")); static cl::opt<bool> VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " @@ -216,6 +221,12 @@ static cl::opt<unsigned> cl::desc("Size of the expression which is considered huge"), cl::init(4096)); +static cl::opt<bool> +ClassifyExpressions("scalar-evolution-classify-expressions", + cl::Hidden, cl::init(true), + cl::desc("When printing analysis, include information on every instruction")); + + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -1707,7 +1718,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. @@ -2051,7 +2062,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. @@ -3421,7 +3432,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } - // It's tempting to want to call getMaxBackedgeTakenCount count here and + // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and // use that information to infer NUW and NSW flags. However, computing a // BE count requires calling getAddRecExpr, so we may not yet have a // meaningful BE count at this point (and if we don't, we'd be stuck @@ -3484,7 +3495,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); // getSCEV(Base)->getType() has the same address space as Base->getType() // because SCEV::getType() preserves the address space. - Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType()); + Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType()); // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP // instruction to its SCEV, because the Instruction may be guarded by control // flow and the no-overflow bits may not be valid for the expression in any @@ -3493,7 +3504,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - const SCEV *TotalOffset = getZero(IntPtrTy); + const SCEV *TotalOffset = getZero(IntIdxTy); // The array size is unimportant. The first thing we do on CurTy is getting // its element type. Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); @@ -3503,7 +3514,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, // For a struct, add the member offset. ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue(); unsigned FieldNo = Index->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); @@ -3514,9 +3525,9 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, // Update CurTy to its element type. CurTy = cast<SequentialType>(CurTy)->getElementType(); // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy); + const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy); // Getelementptr indices are signed. - IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy); + IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy); // Multiply the index by the element size to compute the element offset. const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); @@ -3775,7 +3786,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { /// Return a type with the same bitwidth as the given type and which represents /// how SCEV will treat the given type, for which isSCEVable must return -/// true. For pointer types, this is the pointer-sized integer type. +/// true. For pointer types, this is the pointer index sized integer type. Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); @@ -3784,7 +3795,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - return getDataLayout().getIntPtrType(Ty); + return getDataLayout().getIndexType(Ty); } Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { @@ -4564,6 +4575,12 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { break; } + // Recognise intrinsic loop.decrement.reg, and as this has exactly the same + // semantics as a Sub, return a binary sub expression. + if (auto *II = dyn_cast<IntrinsicInst>(V)) + if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg) + return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1)); + return None; } @@ -4991,7 +5008,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, // overflow. if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags); + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); return PHISCEV; } @@ -5549,6 +5566,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + using OBO = OverflowingBinaryOperator; // If the value has known zeros, the maximum value will have those known zeros // as well. @@ -5566,8 +5584,14 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); + unsigned WrapType = OBO::AnyWrap; + if (Add->hasNoSignedWrap()) + WrapType |= OBO::NoSignedWrap; + if (Add->hasNoUnsignedWrap()) + WrapType |= OBO::NoUnsignedWrap; for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) - X = X.add(getRangeRef(Add->getOperand(i), SignHint)); + X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint), + WrapType, RangeType); return setRange(Add, SignHint, ConservativeResult.intersectWith(X, RangeType)); } @@ -5596,6 +5620,22 @@ ScalarEvolution::getRangeRef(const SCEV *S, ConservativeResult.intersectWith(X, RangeType)); } + if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) { + ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint); + for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i) + X = X.smin(getRangeRef(SMin->getOperand(i), SignHint)); + return setRange(SMin, SignHint, + ConservativeResult.intersectWith(X, RangeType)); + } + + if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) { + ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint); + for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i) + X = X.umin(getRangeRef(UMin->getOperand(i), SignHint)); + return setRange(UMin, SignHint, + ConservativeResult.intersectWith(X, RangeType)); + } + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); @@ -5627,34 +5667,43 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { // If there's no unsigned wrap, the value will never be less than its // initial value. - if (AddRec->hasNoUnsignedWrap()) - if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) - if (!C->getValue()->isZero()) - ConservativeResult = ConservativeResult.intersectWith( - ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType); - - // If there's no signed wrap, and all the operands have the same sign or - // zero, the value won't ever change sign. + if (AddRec->hasNoUnsignedWrap()) { + APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart()); + if (!UnsignedMinValue.isNullValue()) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType); + } + + // If there's no signed wrap, and all the operands except initial value have + // the same sign or zero, the value won't ever be: + // 1: smaller than initial value if operands are non negative, + // 2: bigger than initial value if operands are non positive. + // For both cases, value can not cross signed min/max boundary. if (AddRec->hasNoSignedWrap()) { bool AllNonNeg = true; bool AllNonPos = true; - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; - if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; + for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) { + if (!isKnownNonNegative(AddRec->getOperand(i))) + AllNonNeg = false; + if (!isKnownNonPositive(AddRec->getOperand(i))) + AllNonPos = false; } if (AllNonNeg) ConservativeResult = ConservativeResult.intersectWith( - ConstantRange(APInt(BitWidth, 0), - APInt::getSignedMinValue(BitWidth)), RangeType); + ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()), + APInt::getSignedMinValue(BitWidth)), + RangeType); else if (AllNonPos) ConservativeResult = ConservativeResult.intersectWith( - ConstantRange(APInt::getSignedMinValue(BitWidth), - APInt(BitWidth, 1)), RangeType); + ConstantRange::getNonEmpty( + APInt::getSignedMinValue(BitWidth), + getSignedRangeMax(AddRec->getStart()) + 1), + RangeType); } // TODO: non-affine addrec if (AddRec->isAffine()) { - const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { auto RangeFromAffine = getRangeForAffineAR( @@ -5690,14 +5739,26 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); - if (Known.One != ~Known.Zero + 1) - ConservativeResult = - ConservativeResult.intersectWith( - ConstantRange(Known.One, ~Known.Zero + 1), RangeType); + if (Known.getBitWidth() != BitWidth) + Known = Known.zextOrTrunc(BitWidth, true); + // If Known does not result in full-set, intersect with it. + if (Known.getMinValue() != Known.getMaxValue() + 1) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1), + RangeType); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); + // If the pointer size is larger than the index size type, this can cause + // NS to be larger than BitWidth. So compensate for this. + if (U->getType()->isPointerTy()) { + unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType()); + int ptrIdxDiff = ptrSize - BitWidth; + if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff) + NS -= ptrIdxDiff; + } + if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -6523,7 +6584,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { const auto *MaxExitCount = - dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L)); + dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L)); return getConstantTripCount(MaxExitCount); } @@ -6579,12 +6640,16 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, return (unsigned)Result->getZExtValue(); } -/// Get the expression for the number of loop iterations for which this loop is -/// guaranteed not to exit via ExitingBlock. Otherwise return -/// SCEVCouldNotCompute. const SCEV *ScalarEvolution::getExitCount(const Loop *L, - BasicBlock *ExitingBlock) { - return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); + BasicBlock *ExitingBlock, + ExitCountKind Kind) { + switch (Kind) { + case Exact: + return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); + case ConstantMaximum: + return getBackedgeTakenInfo(L).getMax(ExitingBlock, this); + }; + llvm_unreachable("Invalid ExitCountKind!"); } const SCEV * @@ -6593,14 +6658,15 @@ ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); } -const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).getExact(L, this); -} - -/// Similar to getBackedgeTakenCount, except return the least SCEV value that is -/// known never to be less than the actual backedge taken count. -const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).getMax(this); +const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, + ExitCountKind Kind) { + switch (Kind) { + case Exact: + return getBackedgeTakenInfo(L).getExact(L, this); + case ConstantMaximum: + return getBackedgeTakenInfo(L).getMax(this); + }; + llvm_unreachable("Invalid ExitCountKind!"); } bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { @@ -6909,6 +6975,16 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, return SE->getCouldNotCompute(); } +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock, + ScalarEvolution *SE) const { + for (auto &ENT : ExitNotTaken) + if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) + return ENT.MaxNotTaken; + + return SE->getCouldNotCompute(); +} + /// getMax - Get the max backedge taken count for the loop. const SCEV * ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { @@ -7000,13 +7076,15 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( BasicBlock *ExitBB = EEI.first; const ExitLimit &EL = EEI.second; if (EL.Predicates.empty()) - return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr); + return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, + nullptr); std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate); for (auto *Pred : EL.Predicates) Predicate->add(Pred); - return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate)); + return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, + std::move(Predicate)); }); assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) && "No point in having a non-constant max backedge taken count!"); @@ -7038,6 +7116,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, // Do a union of all the predicates here. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; + + // We canonicalize untaken exits to br (constant), ignore them so that + // proving an exit untaken doesn't negatively impact our ability to reason + // about the loop as whole. + if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator())) + if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { + bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); + if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne())) + continue; + } + ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); assert((AllowPredicates || EL.Predicates.empty()) && @@ -7197,6 +7286,11 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( ExitLimit EL1 = computeExitLimitFromCondCached( Cache, L, BO->getOperand(1), ExitIfTrue, ControlsExit && !EitherMayExit, AllowPredicates); + // Be robust against unsimplified IR for the form "and i1 X, true" + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) + return CI->isOne() ? EL0 : EL1; + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0))) + return CI->isOne() ? EL1 : EL0; const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -7245,6 +7339,11 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( ExitLimit EL1 = computeExitLimitFromCondCached( Cache, L, BO->getOperand(1), ExitIfTrue, ControlsExit && !EitherMayExit, AllowPredicates); + // Be robust against unsimplified IR for the form "or i1 X, true" + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) + return CI->isZero() ? EL0 : EL1; + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0))) + return CI->isZero() ? EL1 : EL0; const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -9833,6 +9932,10 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, // We avoid subtracting expressions here because this function is usually // fairly deep in the call stack (i.e. is called many times). + // X - X = 0. + if (More == Less) + return APInt(getTypeSizeInBits(More->getType()), 0); + if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) { const auto *LAR = cast<SCEVAddRecExpr>(Less); const auto *MAR = cast<SCEVAddRecExpr>(More); @@ -10314,10 +10417,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, return false; } +static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // zext x u<= sext x, sext x s<= zext x + switch (Pred) { + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: { + // If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt. + const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS); + const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: { + // If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt. + const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS); + const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + default: + break; + }; + return false; +} + bool ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || + return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || + isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || isKnownPredicateViaNoOverflow(Pred, LHS, RHS); @@ -10919,7 +11055,7 @@ struct SCEVCollectAddRecMultiplies { } else if (Unknown) { HasAddRec = true; } else { - bool ContainsAddRec; + bool ContainsAddRec = false; SCEVHasAddRec ContiansAddRec(ContainsAddRec); visitAll(Op, ContiansAddRec); HasAddRec |= ContainsAddRec; @@ -11434,8 +11570,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; - if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { - OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) { + OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L); if (SE->isBackedgeTakenCountMaxOrZero(L)) OS << ", actual taken count either this or zero."; } else { @@ -11487,77 +11623,79 @@ void ScalarEvolution::print(raw_ostream &OS) const { // const isn't dangerous. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); - OS << "Classifying expressions for: "; - F.printAsOperand(OS, /*PrintType=*/false); - OS << "\n"; - for (Instruction &I : instructions(F)) - if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) { - OS << I << '\n'; - OS << " --> "; - const SCEV *SV = SE.getSCEV(&I); - SV->print(OS); - if (!isa<SCEVCouldNotCompute>(SV)) { - OS << " U: "; - SE.getUnsignedRange(SV).print(OS); - OS << " S: "; - SE.getSignedRange(SV).print(OS); - } - - const Loop *L = LI.getLoopFor(I.getParent()); - - const SCEV *AtUse = SE.getSCEVAtScope(SV, L); - if (AtUse != SV) { + if (ClassifyExpressions) { + OS << "Classifying expressions for: "; + F.printAsOperand(OS, /*PrintType=*/false); + OS << "\n"; + for (Instruction &I : instructions(F)) + if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) { + OS << I << '\n'; OS << " --> "; - AtUse->print(OS); - if (!isa<SCEVCouldNotCompute>(AtUse)) { + const SCEV *SV = SE.getSCEV(&I); + SV->print(OS); + if (!isa<SCEVCouldNotCompute>(SV)) { OS << " U: "; - SE.getUnsignedRange(AtUse).print(OS); + SE.getUnsignedRange(SV).print(OS); OS << " S: "; - SE.getSignedRange(AtUse).print(OS); + SE.getSignedRange(SV).print(OS); } - } - if (L) { - OS << "\t\t" "Exits: "; - const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); - if (!SE.isLoopInvariant(ExitValue, L)) { - OS << "<<Unknown>>"; - } else { - OS << *ExitValue; + const Loop *L = LI.getLoopFor(I.getParent()); + + const SCEV *AtUse = SE.getSCEVAtScope(SV, L); + if (AtUse != SV) { + OS << " --> "; + AtUse->print(OS); + if (!isa<SCEVCouldNotCompute>(AtUse)) { + OS << " U: "; + SE.getUnsignedRange(AtUse).print(OS); + OS << " S: "; + SE.getSignedRange(AtUse).print(OS); + } } - bool First = true; - for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { - if (First) { - OS << "\t\t" "LoopDispositions: { "; - First = false; + if (L) { + OS << "\t\t" "Exits: "; + const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + if (!SE.isLoopInvariant(ExitValue, L)) { + OS << "<<Unknown>>"; } else { - OS << ", "; + OS << *ExitValue; } - Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); - OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); - } + bool First = true; + for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { + if (First) { + OS << "\t\t" "LoopDispositions: { "; + First = false; + } else { + OS << ", "; + } - for (auto *InnerL : depth_first(L)) { - if (InnerL == L) - continue; - if (First) { - OS << "\t\t" "LoopDispositions: { "; - First = false; - } else { - OS << ", "; + Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); + } + + for (auto *InnerL : depth_first(L)) { + if (InnerL == L) + continue; + if (First) { + OS << "\t\t" "LoopDispositions: { "; + First = false; + } else { + OS << ", "; + } + + InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL)); } - InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); - OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL)); + OS << " }"; } - OS << " }"; + OS << "\n"; } - - OS << "\n"; - } + } OS << "Determining loop execution counts for: "; F.printAsOperand(OS, /*PrintType=*/false); @@ -11901,14 +12039,14 @@ void ScalarEvolution::verify() const { SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); - auto *ConstantDelta = - dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount)); + const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount); - if (ConstantDelta && ConstantDelta->getAPInt() != 0) { - dbgs() << "Trip Count Changed!\n"; + // Unless VerifySCEVStrict is set, we only compare constant deltas. + if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) { + dbgs() << "Trip Count for " << *L << " Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; - dbgs() << "Delta: " << *ConstantDelta << "\n"; + dbgs() << "Delta: " << *Delta << "\n"; std::abort(); } } @@ -11937,6 +12075,12 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, } PreservedAnalyses +ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { + AM.getResult<ScalarEvolutionAnalysis>(F).verify(); + return PreservedAnalyses::all(); +} + +PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult<ScalarEvolutionAnalysis>(F).print(OS); return PreservedAnalyses::all(); @@ -11959,7 +12103,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { SE.reset(new ScalarEvolution( - F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), + F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F), getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), getAnalysis<DominatorTreeWrapperPass>().getDomTree(), getAnalysis<LoopInfoWrapperPass>().getLoopInfo())); @@ -12405,7 +12549,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution( const PredicatedScalarEvolution &Init) : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { - for (const auto &I : Init.FlagsMap) + for (auto I : Init.FlagsMap) FlagsMap.insert(I); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 96da0a24cddd..79640256f695 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/InitializePasses.h" using namespace llvm; AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index e8a95d35482c..dc5d02aa3a3c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -240,9 +240,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// division. If so, update S with Factor divided out and return true. /// S need not be evenly divisible if a reasonable remainder can be /// computed. -/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made -/// unnecessary; in its place, just signed-divide Ops[i] by the scale and -/// check to see if the divide was folded. static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, const DataLayout &DL) { @@ -417,7 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - Type *IntPtrTy = DL.getIntPtrType(PTy); + Type *IntIdxTy = DL.getIndexType(PTy); // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP @@ -429,7 +426,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector<const SCEV *, 8> ScaledOps; if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); + const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); if (!ElSize->isZero()) { SmallVector<const SCEV *, 8> NewOps; for (const SCEV *Op : Ops) { @@ -1486,7 +1483,18 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - if (!CanonicalMode) return expandAddRecExprLiterally(S); + // In canonical mode we compute the addrec as an expression of a canonical IV + // using evaluateAtIteration and expand the resulting SCEV expression. This + // way we avoid introducing new IVs to carry on the comutation of the addrec + // throughout the loop. + // + // For nested addrecs evaluateAtIteration might need a canonical IV of a + // type wider than the addrec itself. Emitting a canonical IV of the + // proper type might produce non-legal types, for example expanding an i64 + // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall + // back to non-canonical mode for nested addrecs. + if (!CanonicalMode || (S->getNumOperands() > 2)) + return expandAddRecExprLiterally(S); Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); @@ -2094,11 +2102,10 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, for (BasicBlock *BB : ExitingBlocks) { ICmpInst::Predicate Pred; Instruction *LHS, *RHS; - BasicBlock *TrueBB, *FalseBB; if (!match(BB->getTerminator(), m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), - TrueBB, FalseBB))) + m_BasicBlock(), m_BasicBlock()))) continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp index 094e4a3d5dc8..8928678d6ab2 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 4cf235db86eb..7f5bedabbd80 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -13,6 +13,8 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -333,8 +335,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { // FIXME: consult devirt? // Do not follow aliases, otherwise we could inadvertently follow // dso_preemptable aliases or aliases with interposable linkage. - const GlobalValue *Callee = dyn_cast<GlobalValue>( - CS.getCalledValue()->stripPointerCastsNoFollowAliases()); + const GlobalValue *Callee = + dyn_cast<GlobalValue>(CS.getCalledValue()->stripPointerCasts()); if (!Callee) { US.updateRange(UnknownRange); return false; diff --git a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp index 3cf248a31142..8447dc87069d 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp @@ -218,9 +218,11 @@ struct DivergencePropagator { template <typename SuccessorIterable> std::unique_ptr<ConstBlockSet> computeJoinPoints(const BasicBlock &RootBlock, - SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) { + SuccessorIterable NodeSuccessors, const Loop *ParentLoop) { assert(JoinBlocks); + LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" ); + // bootstrap with branch targets for (const auto *SuccBlock : NodeSuccessors) { DefMap.emplace(SuccBlock, SuccBlock); @@ -228,13 +230,19 @@ struct DivergencePropagator { if (ParentLoop && !ParentLoop->contains(SuccBlock)) { // immediate loop exit from node. ReachedLoopExits.insert(SuccBlock); - continue; } else { // regular successor PendingUpdates.insert(SuccBlock); } } + LLVM_DEBUG( + dbgs() << "SDA: rpo order:\n"; + for (const auto * RpoBlock : FuncRPOT) { + dbgs() << "- " << RpoBlock->getName() << "\n"; + } + ); + auto ItBeginRPO = FuncRPOT.begin(); // skip until term (TODO RPOT won't let us start at @term directly) @@ -245,16 +253,18 @@ struct DivergencePropagator { // propagate definitions at the immediate successors of the node in RPO auto ItBlockRPO = ItBeginRPO; - while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) { + while ((++ItBlockRPO != ItEndRPO) && + !PendingUpdates.empty()) { const auto *Block = *ItBlockRPO; + LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); - // skip @block if not pending update + // skip Block if not pending update auto ItPending = PendingUpdates.find(Block); if (ItPending == PendingUpdates.end()) continue; PendingUpdates.erase(ItPending); - // propagate definition at @block to its successors + // propagate definition at Block to its successors auto ItDef = DefMap.find(Block); const auto *DefBlock = ItDef->second; assert(DefBlock); @@ -278,6 +288,8 @@ struct DivergencePropagator { } } + LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); + // We need to know the definition at the parent loop header to decide // whether the definition at the header is different from the definition at // the loop exits, which would indicate a divergent loop exits. @@ -292,24 +304,17 @@ struct DivergencePropagator { // | // proper exit from both loops // - // D post-dominates B as it is the only proper exit from the "A loop". - // If C has a divergent branch, propagation will therefore stop at D. - // That implies that B will never receive a definition. - // But that definition can only be the same as at D (D itself in thise case) - // because all paths to anywhere have to pass through D. - // - const BasicBlock *ParentLoopHeader = - ParentLoop ? ParentLoop->getHeader() : nullptr; - if (ParentLoop && ParentLoop->contains(PdBoundBlock)) { - DefMap[ParentLoopHeader] = DefMap[PdBoundBlock]; - } - // analyze reached loop exits if (!ReachedLoopExits.empty()) { + const BasicBlock *ParentLoopHeader = + ParentLoop ? ParentLoop->getHeader() : nullptr; + assert(ParentLoop); - const auto *HeaderDefBlock = DefMap[ParentLoopHeader]; + auto ItHeaderDef = DefMap.find(ParentLoopHeader); + const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second; + LLVM_DEBUG(printDefs(dbgs())); - assert(HeaderDefBlock && "no definition in header of carrying loop"); + assert(HeaderDefBlock && "no definition at header of carrying loop"); for (const auto *ExitBlock : ReachedLoopExits) { auto ItExitDef = DefMap.find(ExitBlock); @@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) { return *ItCached->second; } - // dont propagte beyond the immediate post dom of the loop - const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader())); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - while (PdBoundBlock && Loop.contains(PdBoundBlock)) { - IpdNode = IpdNode->getIDom(); - PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - } - // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>( - *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock); + *Loop.getHeader(), LoopExits, Loop.getParentLoop()); auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks)); assert(ItInserted.second); @@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) { if (ItCached != CachedBranchJoins.end()) return *ItCached->second; - // dont propagate beyond the immediate post dominator of the branch - const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent())); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; const auto &TermBlock = *Term.getParent(); auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>( - TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock); + TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); assert(ItInserted.second); diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp index ef139d3257d2..c7238db43aab 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -28,7 +29,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary( clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", "Intel SVML library"))); -StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { +StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = + { #define TLI_DEFINE_STRING #include "llvm/Analysis/TargetLibraryInfo.def" }; @@ -58,14 +60,14 @@ static bool hasBcmp(const Triple &TT) { return TT.isGNUEnvironment() || TT.isMusl(); // Both NetBSD and OpenBSD are planning to remove the function. Windows does // not have it. - return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin(); + return TT.isOSFreeBSD() || TT.isOSSolaris(); } /// Initialize the set of available library functions based on the specified /// target triple. This should be carefully written so that a missing target /// triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - ArrayRef<StringRef> StandardNames) { + ArrayRef<StringLiteral> StandardNames) { // Verify that the StandardNames array is in alphabetical order. assert(std::is_sorted(StandardNames.begin(), StandardNames.end(), [](StringRef LHS, StringRef RHS) { @@ -104,19 +106,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setShouldSignExtI32Param(ShouldSignExtI32Param); if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) { - TLI.setUnavailable(LibFunc_ldexp); - TLI.setUnavailable(LibFunc_ldexpf); - TLI.setUnavailable(LibFunc_ldexpl); - TLI.setUnavailable(LibFunc_exp10); - TLI.setUnavailable(LibFunc_exp10f); - TLI.setUnavailable(LibFunc_exp10l); - TLI.setUnavailable(LibFunc_log10); - TLI.setUnavailable(LibFunc_log10f); - TLI.setUnavailable(LibFunc_log10l); - } + T.getArch() == Triple::amdgcn) + TLI.disableAllFunctions(); - // There are no library implementations of mempcy and memset for AMD gpus and + // There are no library implementations of memcpy and memset for AMD gpus and // these can be difficult to lower in the backend. if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { @@ -386,10 +379,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, case Triple::TvOS: case Triple::WatchOS: TLI.setUnavailable(LibFunc_exp10l); - if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) || - (T.isOSVersionLT(9, 0) && - (T.getArch() == Triple::x86 || - T.getArch() == Triple::x86_64)))) { + if (!T.isWatchOS() && + (T.isOSVersionLT(7, 0) || (T.isOSVersionLT(9, 0) && T.isX86()))) { TLI.setUnavailable(LibFunc_exp10); TLI.setUnavailable(LibFunc_exp10f); } else { @@ -623,19 +614,14 @@ static StringRef sanitizeFunctionName(StringRef funcName) { return GlobalValue::dropLLVMManglingEscape(funcName); } -bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, - LibFunc &F) const { - StringRef const *Start = &StandardNames[0]; - StringRef const *End = &StandardNames[NumLibFuncs]; - +bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const { funcName = sanitizeFunctionName(funcName); if (funcName.empty()) return false; - StringRef const *I = std::lower_bound( - Start, End, funcName, [](StringRef LHS, StringRef RHS) { - return LHS < RHS; - }); + const auto *Start = std::begin(StandardNames); + const auto *End = std::end(StandardNames); + const auto *I = std::lower_bound(Start, End, funcName); if (I != End && *I == funcName) { F = (LibFunc)(I - Start); return true; @@ -1481,6 +1467,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return false; } case LibFunc::NumLibFuncs: + case LibFunc::NotLibFunc: break; } @@ -1599,30 +1586,12 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F, return I->ScalarFnName; } -TargetLibraryInfo TargetLibraryAnalysis::run(Module &M, - ModuleAnalysisManager &) { - if (PresetInfoImpl) - return TargetLibraryInfo(*PresetInfoImpl); - - return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple()))); -} - -TargetLibraryInfo TargetLibraryAnalysis::run(Function &F, +TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F, FunctionAnalysisManager &) { - if (PresetInfoImpl) - return TargetLibraryInfo(*PresetInfoImpl); - - return TargetLibraryInfo( - lookupInfoImpl(Triple(F.getParent()->getTargetTriple()))); -} - -TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) { - std::unique_ptr<TargetLibraryInfoImpl> &Impl = - Impls[T.normalize()]; - if (!Impl) - Impl.reset(new TargetLibraryInfoImpl(T)); - - return *Impl; + if (!BaselineInfoImpl) + BaselineInfoImpl = + TargetLibraryInfoImpl(Triple(F.getParent()->getTargetTriple())); + return TargetLibraryInfo(*BaselineInfoImpl, &F); } unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const { @@ -1633,18 +1602,18 @@ unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const { } TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass() - : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) { + : ImmutablePass(ID), TLA(TargetLibraryInfoImpl()) { initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T) - : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) { + : ImmutablePass(ID), TLA(TargetLibraryInfoImpl(T)) { initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass( const TargetLibraryInfoImpl &TLIImpl) - : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) { + : ImmutablePass(ID), TLA(TLIImpl) { initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } @@ -1656,3 +1625,19 @@ INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo", char TargetLibraryInfoWrapperPass::ID = 0; void TargetLibraryInfoWrapperPass::anchor() {} + +unsigned TargetLibraryInfoImpl::getWidestVF(StringRef ScalarF) const { + ScalarF = sanitizeFunctionName(ScalarF); + if (ScalarF.empty()) + return 1; + + unsigned VF = 1; + std::vector<VecDesc>::const_iterator I = + llvm::lower_bound(VectorDescs, ScalarF, compareWithScalarFnName); + while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == ScalarF) { + if (I->VectorizationFactor > VF) + VF = I->VectorizationFactor; + ++I; + } + return VF; +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp index eb04c34453fb..f2c63f789d89 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" @@ -16,10 +19,9 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/LoopIterator.h" #include <utility> using namespace llvm; @@ -59,11 +61,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, SmallVector<BasicBlock *, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), - IE = ExitingBlocks.end(); - I != IE; ++I) { - BasicBlock *BB = *I; - + for (BasicBlock *BB : ExitingBlocks) { // If we pass the updated counter back through a phi, we need to know // which latch the updated value will be coming from. if (!L->isLoopLatch(BB)) { @@ -97,13 +95,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PIE = pred_end(L->getHeader()); - PI != PIE; ++PI) { - if (!L->contains(*PI)) + for (BasicBlock *Pred : predecessors(L->getHeader())) { + if (!L->contains(Pred)) continue; - if (!DT.dominates(*I, *PI)) { + if (!DT.dominates(BB, Pred)) { NotAlways = true; break; } @@ -127,7 +123,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // Note that this block may not be the loop latch block, even if the loop // has a latch block. - ExitBlock = *I; + ExitBlock = BB; ExitCount = EC; break; } @@ -199,9 +195,10 @@ int TargetTransformInfo::getIntrinsicCost( } unsigned -TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, - unsigned &JTSize) const { - return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); +TargetTransformInfo::getEstimatedNumberOfCaseClusters( + const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) const { + return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } int TargetTransformInfo::getUserCost(const User *U, @@ -227,6 +224,16 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } +bool TargetTransformInfo::collectFlatAddressOperands( + SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { + return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); +} + +bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } @@ -237,6 +244,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable( return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } +bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, + ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, + DominatorTree *DT, const LoopAccessInfo *LAI) const { + return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); @@ -283,30 +296,33 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { return TTIImpl->shouldFavorBackedgeIndex(L); } -bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { - return TTIImpl->isLegalMaskedStore(DataType); +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedStore(DataType, Alignment); } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { - return TTIImpl->isLegalMaskedLoad(DataType); +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalNTStore(Type *DataType, - unsigned Alignment) const { + Align Alignment) const { return TTIImpl->isLegalNTStore(DataType, Alignment); } -bool TargetTransformInfo::isLegalNTLoad(Type *DataType, - unsigned Alignment) const { +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTLoad(DataType, Alignment); } -bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { - return TTIImpl->isLegalMaskedGather(DataType); +bool TargetTransformInfo::isLegalMaskedGather(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedGather(DataType, Alignment); } -bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { - return TTIImpl->isLegalMaskedScatter(DataType); +bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedScatter(DataType, Alignment); } bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const { @@ -359,14 +375,6 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } -unsigned TargetTransformInfo::getJumpBufAlignment() const { - return TTIImpl->getJumpBufAlignment(); -} - -unsigned TargetTransformInfo::getJumpBufSize() const { - return TTIImpl->getJumpBufSize(); -} - bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } @@ -456,22 +464,30 @@ int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return Cost; } -int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { - return TTIImpl->getNumberOfRegisters(Vector); +unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { + return TTIImpl->getNumberOfRegisters(ClassID); +} + +unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { + return TTIImpl->getRegisterClassForType(Vector, Ty); +} + +const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { + return TTIImpl->getRegisterClassName(ClassID); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { @@ -578,10 +594,10 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef<const Value *> Args) const { - int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args); + OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, + const Instruction *CxtI) const { + int Cost = TTIImpl->getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -633,7 +649,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, } int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, + MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I) const { assert ((I == nullptr || I->getOpcode() == Opcode) && @@ -1169,7 +1185,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { Op2VK = getOperandInfo(I->getOperand(1), Op2VP); SmallVector<const Value *, 2> Operands(I->operand_values()); return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, - Op1VP, Op2VP, Operands); + Op1VP, Op2VP, Operands, I); } case Instruction::FNeg: { TargetTransformInfo::OperandValueKind Op1VK, Op2VK; @@ -1179,7 +1195,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { Op2VP = OP_None; SmallVector<const Value *, 2> Operands(I->operand_values()); return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, - Op1VP, Op2VP, Operands); + Op1VP, Op2VP, Operands, I); } case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); @@ -1195,14 +1211,14 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), - SI->getPointerAddressSpace(), I); + MaybeAlign(SI->getAlignment()), + SI->getPointerAddressSpace(), I); } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(I); return getMemoryOpCost(I->getOpcode(), I->getType(), - LI->getAlignment(), - LI->getPointerAddressSpace(), I); + MaybeAlign(LI->getAlignment()), + LI->getPointerAddressSpace(), I); } case Instruction::ZExt: case Instruction::SExt: @@ -1276,6 +1292,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } + case Instruction::ExtractValue: + return 0; // Model all ExtractValue nodes as free. case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); Type *Ty = Shuffle->getType(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 3b9040aa0f52..da4520066b46 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -114,6 +114,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -520,23 +521,20 @@ static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { } void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { - if (Merge) + if (Merge) { N.TBAA = MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa)); - else - N.TBAA = getMetadata(LLVMContext::MD_tbaa); - - if (Merge) + N.TBAAStruct = nullptr; N.Scope = MDNode::getMostGenericAliasScope( N.Scope, getMetadata(LLVMContext::MD_alias_scope)); - else - N.Scope = getMetadata(LLVMContext::MD_alias_scope); - - if (Merge) N.NoAlias = MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias)); - else + } else { + N.TBAA = getMetadata(LLVMContext::MD_tbaa); + N.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct); + N.Scope = getMetadata(LLVMContext::MD_alias_scope); N.NoAlias = getMetadata(LLVMContext::MD_noalias); + } } static const MDNode *createAccessTag(const MDNode *AccessType) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp index 9311dfbc6eba..072d291f3f93 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad( findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr, Offset->getZExtValue(), CI, DT); } + +Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) { + if (I->getType()->isPointerTy()) { + if (Offset == 0) + return I; + return nullptr; + } + + const DataLayout &DL = M.getDataLayout(); + + if (auto *C = dyn_cast<ConstantStruct>(I)) { + const StructLayout *SL = DL.getStructLayout(C->getType()); + if (Offset >= SL->getSizeInBytes()) + return nullptr; + + unsigned Op = SL->getElementContainingOffset(Offset); + return getPointerAtOffset(cast<Constant>(I->getOperand(Op)), + Offset - SL->getElementOffset(Op), M); + } + if (auto *C = dyn_cast<ConstantArray>(I)) { + ArrayType *VTableTy = C->getType(); + uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); + + unsigned Op = Offset / ElemSize; + if (Op >= C->getNumOperands()) + return nullptr; + + return getPointerAtOffset(cast<Constant>(I->getOperand(Op)), + Offset % ElemSize, M); + } + return nullptr; +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp new file mode 100644 index 000000000000..a331b95e818b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp @@ -0,0 +1,430 @@ +//===- VFABIDemangling.cpp - Vector Function ABI demangling utilities. ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/VectorUtils.h" + +using namespace llvm; + +namespace { +/// Utilities for the Vector Function ABI name parser. + +/// Return types for the parser functions. +enum class ParseRet { + OK, // Found. + None, // Not found. + Error // Syntax error. +}; + +/// Extracts the `<isa>` information from the mangled string, and +/// sets the `ISA` accordingly. +ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) { + if (MangledName.empty()) + return ParseRet::Error; + + if (MangledName.startswith(VFABI::_LLVM_)) { + MangledName = MangledName.drop_front(strlen(VFABI::_LLVM_)); + ISA = VFISAKind::LLVM; + } else { + ISA = StringSwitch<VFISAKind>(MangledName.take_front(1)) + .Case("n", VFISAKind::AdvancedSIMD) + .Case("s", VFISAKind::SVE) + .Case("b", VFISAKind::SSE) + .Case("c", VFISAKind::AVX) + .Case("d", VFISAKind::AVX2) + .Case("e", VFISAKind::AVX512) + .Default(VFISAKind::Unknown); + MangledName = MangledName.drop_front(1); + } + + return ParseRet::OK; +} + +/// Extracts the `<mask>` information from the mangled string, and +/// sets `IsMasked` accordingly. The input string `MangledName` is +/// left unmodified. +ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) { + if (MangledName.consume_front("M")) { + IsMasked = true; + return ParseRet::OK; + } + + if (MangledName.consume_front("N")) { + IsMasked = false; + return ParseRet::OK; + } + + return ParseRet::Error; +} + +/// Extract the `<vlen>` information from the mangled string, and +/// sets `VF` accordingly. A `<vlen> == "x"` token is interpreted as a scalable +/// vector length. On success, the `<vlen>` token is removed from +/// the input string `ParseString`. +/// +ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) { + if (ParseString.consume_front("x")) { + VF = 0; + IsScalable = true; + return ParseRet::OK; + } + + if (ParseString.consumeInteger(10, VF)) + return ParseRet::Error; + + IsScalable = false; + return ParseRet::OK; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// <token> <number> +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `Pos` to +/// <number>, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +/// +/// The function expects <token> to be one of "ls", "Rs", "Us" or +/// "Ls". +ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString, + VFParamKind &PKind, int &Pos, + const StringRef Token) { + if (ParseString.consume_front(Token)) { + PKind = VFABI::getVFParamKindFromString(Token); + if (ParseString.consumeInteger(10, Pos)) + return ParseRet::Error; + return ParseRet::OK; + } + + return ParseRet::None; +} + +/// The function looks for the following stringt at the beginning of +/// the input string `ParseString`: +/// +/// <token> <number> +/// +/// <token> is one of "ls", "Rs", "Us" or "Ls". +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to +/// <number>, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString, + VFParamKind &PKind, int &StepOrPos) { + ParseRet Ret; + + // "ls" <RuntimeStepPos> + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "ls"); + if (Ret != ParseRet::None) + return Ret; + + // "Rs" <RuntimeStepPos> + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Rs"); + if (Ret != ParseRet::None) + return Ret; + + // "Ls" <RuntimeStepPos> + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Ls"); + if (Ret != ParseRet::None) + return Ret; + + // "Us" <RuntimeStepPos> + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Us"); + if (Ret != ParseRet::None) + return Ret; + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// <token> {"n"} <number> +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `LinearStep` to +/// <number>, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +/// +/// The function expects <token> to be one of "l", "R", "U" or +/// "L". +ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString, + VFParamKind &PKind, int &LinearStep, + const StringRef Token) { + if (ParseString.consume_front(Token)) { + PKind = VFABI::getVFParamKindFromString(Token); + const bool Negate = ParseString.consume_front("n"); + if (ParseString.consumeInteger(10, LinearStep)) + LinearStep = 1; + if (Negate) + LinearStep *= -1; + return ParseRet::OK; + } + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// ["l" | "R" | "U" | "L"] {"n"} <number> +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `LinearStep` to +/// <number>, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString, + VFParamKind &PKind, int &StepOrPos) { + // "l" {"n"} <CompileTimeStep> + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") == + ParseRet::OK) + return ParseRet::OK; + + // "R" {"n"} <CompileTimeStep> + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "R") == + ParseRet::OK) + return ParseRet::OK; + + // "L" {"n"} <CompileTimeStep> + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "L") == + ParseRet::OK) + return ParseRet::OK; + + // "U" {"n"} <CompileTimeStep> + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "U") == + ParseRet::OK) + return ParseRet::OK; + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// "u" <number> +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `Pos` to +/// <number>, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) { + // "u" <Pos> + const char *UniformToken = "u"; + if (ParseString.consume_front(UniformToken)) { + PKind = VFABI::getVFParamKindFromString(UniformToken); + if (ParseString.consumeInteger(10, Pos)) + return ParseRet::Error; + + return ParseRet::OK; + } + return ParseRet::None; +} + +/// Looks into the <parameters> part of the mangled name in search +/// for valid paramaters at the beginning of the string +/// `ParseString`. +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` +/// accordingly, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind, + int &StepOrPos) { + if (ParseString.consume_front("v")) { + PKind = VFParamKind::Vector; + StepOrPos = 0; + return ParseRet::OK; + } + + const ParseRet HasLinearRuntime = + tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos); + if (HasLinearRuntime != ParseRet::None) + return HasLinearRuntime; + + const ParseRet HasLinearCompileTime = + tryParseLinearWithCompileTimeStep(ParseString, PKind, StepOrPos); + if (HasLinearCompileTime != ParseRet::None) + return HasLinearCompileTime; + + const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos); + if (HasUniform != ParseRet::None) + return HasUniform; + + return ParseRet::None; +} + +/// Looks into the <parameters> part of the mangled name in search +/// of a valid 'aligned' clause. The function should be invoked +/// after parsing a parameter via `tryParseParameter`. +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` +/// accordingly, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) { + uint64_t Val; + // "a" <number> + if (ParseString.consume_front("a")) { + if (ParseString.consumeInteger(10, Val)) + return ParseRet::Error; + + if (!isPowerOf2_64(Val)) + return ParseRet::Error; + + Alignment = Align(Val); + + return ParseRet::OK; + } + + return ParseRet::None; +} +} // namespace + +// Format of the ABI name: +// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] +Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) { + const StringRef OriginalName = MangledName; + // Assume there is no custom name <redirection>, and therefore the + // vector name consists of + // _ZGV<isa><mask><vlen><parameters>_<scalarname>. + StringRef VectorName = MangledName; + + // Parse the fixed size part of the manled name + if (!MangledName.consume_front("_ZGV")) + return None; + + // Extract ISA. An unknow ISA is also supported, so we accept all + // values. + VFISAKind ISA; + if (tryParseISA(MangledName, ISA) != ParseRet::OK) + return None; + + // Extract <mask>. + bool IsMasked; + if (tryParseMask(MangledName, IsMasked) != ParseRet::OK) + return None; + + // Parse the variable size, starting from <vlen>. + unsigned VF; + bool IsScalable; + if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK) + return None; + + // Parse the <parameters>. + ParseRet ParamFound; + SmallVector<VFParameter, 8> Parameters; + do { + const unsigned ParameterPos = Parameters.size(); + VFParamKind PKind; + int StepOrPos; + ParamFound = tryParseParameter(MangledName, PKind, StepOrPos); + + // Bail off if there is a parsing error in the parsing of the parameter. + if (ParamFound == ParseRet::Error) + return None; + + if (ParamFound == ParseRet::OK) { + Align Alignment; + // Look for the alignment token "a <number>". + const ParseRet AlignFound = tryParseAlign(MangledName, Alignment); + // Bail off if there is a syntax error in the align token. + if (AlignFound == ParseRet::Error) + return None; + + // Add the parameter. + Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment}); + } + } while (ParamFound == ParseRet::OK); + + // A valid MangledName must have at least one valid entry in the + // <parameters>. + if (Parameters.empty()) + return None; + + // Check for the <scalarname> and the optional <redirection>, which + // are separated from the prefix with "_" + if (!MangledName.consume_front("_")) + return None; + + // The rest of the string must be in the format: + // <scalarname>[(<redirection>)] + const StringRef ScalarName = + MangledName.take_while([](char In) { return In != '('; }); + + if (ScalarName.empty()) + return None; + + // Reduce MangledName to [(<redirection>)]. + MangledName = MangledName.ltrim(ScalarName); + // Find the optional custom name redirection. + if (MangledName.consume_front("(")) { + if (!MangledName.consume_back(")")) + return None; + // Update the vector variant with the one specified by the user. + VectorName = MangledName; + // If the vector name is missing, bail out. + if (VectorName.empty()) + return None; + } + + // LLVM internal mapping via the TargetLibraryInfo (TLI) must be + // redirected to an existing name. + if (ISA == VFISAKind::LLVM && VectorName == OriginalName) + return None; + + // When <mask> is "M", we need to add a parameter that is used as + // global predicate for the function. + if (IsMasked) { + const unsigned Pos = Parameters.size(); + Parameters.push_back({Pos, VFParamKind::GlobalPredicate}); + } + + // Asserts for parameters of type `VFParamKind::GlobalPredicate`, as + // prescribed by the Vector Function ABI specifications supported by + // this parser: + // 1. Uniqueness. + // 2. Must be the last in the parameter list. + const auto NGlobalPreds = std::count_if( + Parameters.begin(), Parameters.end(), [](const VFParameter PK) { + return PK.ParamKind == VFParamKind::GlobalPredicate; + }); + assert(NGlobalPreds < 2 && "Cannot have more than one global predicate."); + if (NGlobalPreds) + assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate && + "The global predicate must be the last parameter"); + + const VFShape Shape({VF, IsScalable, Parameters}); + return VFInfo({Shape, ScalarName, VectorName, ISA}); +} + +VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) { + const VFParamKind ParamKind = StringSwitch<VFParamKind>(Token) + .Case("v", VFParamKind::Vector) + .Case("l", VFParamKind::OMP_Linear) + .Case("R", VFParamKind::OMP_LinearRef) + .Case("L", VFParamKind::OMP_LinearVal) + .Case("U", VFParamKind::OMP_LinearUVal) + .Case("ls", VFParamKind::OMP_LinearPos) + .Case("Ls", VFParamKind::OMP_LinearValPos) + .Case("Rs", VFParamKind::OMP_LinearRefPos) + .Case("Us", VFParamKind::OMP_LinearUValPos) + .Case("u", VFParamKind::OMP_Uniform) + .Default(VFParamKind::Unknown); + + if (ParamKind != VFParamKind::Unknown) + return ParamKind; + + // This function should never be invoked with an invalid input. + llvm_unreachable("This fuction should be invoken only on parameters" + " that have a textual representation in the mangled name" + " of the Vector Function ABI"); +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index c70906dcc629..ad6765e2514b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -51,6 +51,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -88,7 +90,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - return DL.getIndexTypeSizeInBits(Ty); + return DL.getPointerTypeSizeInBits(Ty); } namespace { @@ -558,17 +560,89 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return true; } - // The context comes first, but they're both in the same block. Make sure - // there is nothing in between that might interrupt the control flow. - for (BasicBlock::const_iterator I = - std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); - I != IE; ++I) - if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) + // Don't let an assume affect itself - this would cause the problems + // `isEphemeralValueOf` is trying to prevent, and it would also make + // the loop below go out of bounds. + if (Inv == CxtI) + return false; + + // The context comes first, but they're both in the same block. + // Make sure there is nothing in between that might interrupt + // the control flow, not even CxtI itself. + for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; return !isEphemeralValueOf(Inv, CxtI); } +static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) { + // Use of assumptions is context-sensitive. If we don't have a context, we + // cannot use them! + if (!Q.AC || !Q.CxtI) + return false; + + // Note that the patterns below need to be kept in sync with the code + // in AssumptionCache::updateAffectedValues. + + auto CmpExcludesZero = [V](ICmpInst *Cmp) { + auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); + + Value *RHS; + CmpInst::Predicate Pred; + if (!match(Cmp, m_c_ICmp(Pred, m_V, m_Value(RHS)))) + return false; + // Canonicalize 'v' to be on the LHS of the comparison. + if (Cmp->getOperand(1) != RHS) + Pred = CmpInst::getSwappedPredicate(Pred); + + // assume(v u> y) -> assume(v != 0) + if (Pred == ICmpInst::ICMP_UGT) + return true; + + // assume(v != 0) + // We special-case this one to ensure that we handle `assume(v != null)`. + if (Pred == ICmpInst::ICMP_NE) + return match(RHS, m_Zero()); + + // All other predicates - rely on generic ConstantRange handling. + ConstantInt *CI; + if (!match(RHS, m_ConstantInt(CI))) + return false; + ConstantRange RHSRange(CI->getValue()); + ConstantRange TrueValues = + ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); + return !TrueValues.contains(APInt::getNullValue(CI->getBitWidth())); + }; + + for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { + if (!AssumeVH) + continue; + CallInst *I = cast<CallInst>(AssumeVH); + assert(I->getFunction() == Q.CxtI->getFunction() && + "Got assumption for the wrong function!"); + if (Q.isExcluded(I)) + continue; + + // Warning: This loop can end up being somewhat performance sensitive. + // We're running this loop for once for each value queried resulting in a + // runtime of ~O(#assumes * #values). + + assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && + "must be an assume intrinsic"); + + Value *Arg = I->getArgOperand(0); + ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); + if (!Cmp) + continue; + + if (CmpExcludesZero(Cmp) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) + return true; + } + + return false; +} + static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we @@ -909,7 +983,7 @@ static void computeKnownBitsFromShiftOperator( // If the shift amount could be greater than or equal to the bit-width of the // LHS, the value could be poison, but bail out because the check below is // expensive. TODO: Should we just carry on? - if ((~Known.Zero).uge(BitWidth)) { + if (Known.getMaxValue().uge(BitWidth)) { Known.resetAll(); return; } @@ -1049,7 +1123,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; } case Instruction::Select: { - const Value *LHS, *RHS; + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); @@ -1095,7 +1169,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. - if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) + if (match(RHS, m_Neg(m_Specific(LHS))) && + Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) MaxHighZeros = 1; } @@ -1128,7 +1203,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // which fall through here. Type *ScalarTy = SrcTy->getScalarType(); SrcBitWidth = ScalarTy->isPointerTy() ? - Q.DL.getIndexTypeSizeInBits(ScalarTy) : + Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); assert(SrcBitWidth && "SrcBitWidth can't be zero"); @@ -1346,6 +1421,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, for (unsigned i = 0; i != 2; ++i) { Value *L = P->getIncomingValue(i); Value *R = P->getIncomingValue(!i); + Instruction *RInst = P->getIncomingBlock(!i)->getTerminator(); + Instruction *LInst = P->getIncomingBlock(i)->getTerminator(); Operator *LU = dyn_cast<Operator>(L); if (!LU) continue; @@ -1366,14 +1443,23 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, else if (LR == I) L = LL; else - break; + continue; // Check for recurrence with L and R flipped. + + // Change the context instruction to the "edge" that flows into the + // phi. This is important because that is where the value is actually + // "evaluated" even though it is used later somewhere else. (see also + // D69571). + Query RecQ = Q; + // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, Known2, Depth + 1, Q); + RecQ.CxtI = RInst; + computeKnownBits(R, Known2, Depth + 1, RecQ); // We need to take the minimum number of known bits KnownBits Known3(Known); - computeKnownBits(L, Known3, Depth + 1, Q); + RecQ.CxtI = LInst; + computeKnownBits(L, Known3, Depth + 1, RecQ); Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), Known3.countMinTrailingZeros())); @@ -1429,14 +1515,22 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known.Zero.setAllBits(); Known.One.setAllBits(); - for (Value *IncValue : P->incoming_values()) { + for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { + Value *IncValue = P->getIncomingValue(u); // Skip direct self references. if (IncValue == P) continue; + // Change the context instruction to the "edge" that flows into the + // phi. This is important because that is where the value is actually + // "evaluated" even though it is used later somewhere else. (see also + // D69571). + Query RecQ = Q; + RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); + Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, Known2, MaxDepth - 1, Q); + computeKnownBits(IncValue, Known2, MaxDepth - 1, RecQ); Known.Zero &= Known2.Zero; Known.One &= Known2.One; // If all bits have been ruled out, there's no need to check @@ -1636,7 +1730,7 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, Type *ScalarTy = V->getType()->getScalarType(); unsigned ExpectedWidth = ScalarTy->isPointerTy() ? - Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); + Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; (void)ExpectedWidth; @@ -1714,9 +1808,9 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { - unsigned Align = V->getPointerAlignment(Q.DL); + const MaybeAlign Align = V->getPointerAlignment(Q.DL); if (Align) - Known.Zero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align->value())); } // computeKnownBitsFromAssume strictly refines Known. @@ -1895,8 +1989,8 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, static bool isKnownNonNullFromDominatingCondition(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { - assert(V->getType()->isPointerTy() && "V must be pointer type"); - assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull"); + if (isa<Constant>(V)) + return false; if (!CtxI || !DT) return false; @@ -1917,6 +2011,15 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI)) return true; + // If the value is used as a load/store, then the pointer must be non null. + if (V == getLoadStorePointerOperand(U)) { + const Instruction *I = cast<Instruction>(U); + if (!NullPointerIsDefined(I->getFunction(), + V->getType()->getPointerAddressSpace()) && + DT->dominates(I, CtxI)) + return true; + } + // Consider only compare instructions uniquely controlling a branch CmpInst::Predicate Pred; if (!match(const_cast<User *>(U), @@ -2043,6 +2146,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } } + if (isKnownNonZeroFromAssume(V, Q)) + return true; + // Some of the tests below are recursive, so bail out if we hit the limit. if (Depth++ >= MaxDepth) return false; @@ -2066,17 +2172,16 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (const auto *Call = dyn_cast<CallBase>(V)) { if (Call->isReturnNonNull()) return true; - if (const auto *RP = getArgumentAliasingToReturnedPointer(Call)) + if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) return isKnownNonZero(RP, Depth, Q); } } + if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) + return true; // Check for recursive pointer simplifications. if (V->getType()->isPointerTy()) { - if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) - return true; - // Look through bitcast operations, GEPs, and int2ptr instructions as they // do not alter the value, or at least not the nullness property of the // value, e.g., int2ptr is allowed to zero/sign extend the value. @@ -2300,7 +2405,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, cast<Operator>(Select)->getOpcode() == Instruction::Select && "Input should be a Select!"); - const Value *LHS, *RHS, *LHS2, *RHS2; + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; if (SPF != SPF_SMAX && SPF != SPF_SMIN) return false; @@ -2308,6 +2413,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, if (!match(RHS, m_APInt(CLow))) return false; + const Value *LHS2 = nullptr, *RHS2 = nullptr; SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; if (getInverseMinMaxFlavor(SPF) != SPF2) return false; @@ -2372,7 +2478,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, Type *ScalarTy = V->getType()->getScalarType(); unsigned TyBits = ScalarTy->isPointerTy() ? - Q.DL.getIndexTypeSizeInBits(ScalarTy) : + Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy); unsigned Tmp, Tmp2; @@ -2384,253 +2490,256 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (Depth == MaxDepth) return 1; // Limit search depth. - const Operator *U = dyn_cast<Operator>(V); - switch (Operator::getOpcode(V)) { - default: break; - case Instruction::SExt: - Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; + if (auto *U = dyn_cast<Operator>(V)) { + switch (Operator::getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; - case Instruction::SDiv: { - const APInt *Denominator; - // sdiv X, C -> adds log(C) sign bits. - if (match(U->getOperand(1), m_APInt(Denominator))) { + case Instruction::SDiv: { + const APInt *Denominator; + // sdiv X, C -> adds log(C) sign bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { - // Ignore non-positive denominator. - if (!Denominator->isStrictlyPositive()) - break; + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; - // Calculate the incoming numerator bits. - unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + // Calculate the incoming numerator bits. + unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - // Add floor(log(C)) bits to the numerator bits. - return std::min(TyBits, NumBits + Denominator->logBase2()); + // Add floor(log(C)) bits to the numerator bits. + return std::min(TyBits, NumBits + Denominator->logBase2()); + } + break; } - break; - } - case Instruction::SRem: { - const APInt *Denominator; - // srem X, C -> we know that the result is within [-C+1,C) when C is a - // positive constant. This let us put a lower bound on the number of sign - // bits. - if (match(U->getOperand(1), m_APInt(Denominator))) { - - // Ignore non-positive denominator. - if (!Denominator->isStrictlyPositive()) - break; + case Instruction::SRem: { + const APInt *Denominator; + // srem X, C -> we know that the result is within [-C+1,C) when C is a + // positive constant. This let us put a lower bound on the number of sign + // bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { - // Calculate the incoming numerator bits. SRem by a positive constant - // can't lower the number of sign bits. - unsigned NumrBits = - ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - - // Calculate the leading sign bit constraints by examining the - // denominator. Given that the denominator is positive, there are two - // cases: - // - // 1. the numerator is positive. The result range is [0,C) and [0,C) u< - // (1 << ceilLogBase2(C)). - // - // 2. the numerator is negative. Then the result range is (-C,0] and - // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). - // - // Thus a lower bound on the number of sign bits is `TyBits - - // ceilLogBase2(C)`. + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; - unsigned ResBits = TyBits - Denominator->ceilLogBase2(); - return std::max(NumrBits, ResBits); + // Calculate the incoming numerator bits. SRem by a positive constant + // can't lower the number of sign bits. + unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + + // Calculate the leading sign bit constraints by examining the + // denominator. Given that the denominator is positive, there are two + // cases: + // + // 1. the numerator is positive. The result range is [0,C) and [0,C) u< + // (1 << ceilLogBase2(C)). + // + // 2. the numerator is negative. Then the result range is (-C,0] and + // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). + // + // Thus a lower bound on the number of sign bits is `TyBits - + // ceilLogBase2(C)`. + + unsigned ResBits = TyBits - Denominator->ceilLogBase2(); + return std::max(NumrBits, ResBits); + } + break; } - break; - } - case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - // ashr X, C -> adds C sign bits. Vectors too. - const APInt *ShAmt; - if (match(U->getOperand(1), m_APInt(ShAmt))) { - if (ShAmt->uge(TyBits)) - break; // Bad shift. - unsigned ShAmtLimited = ShAmt->getZExtValue(); - Tmp += ShAmtLimited; - if (Tmp > TyBits) Tmp = TyBits; - } - return Tmp; - } - case Instruction::Shl: { - const APInt *ShAmt; - if (match(U->getOperand(1), m_APInt(ShAmt))) { - // shl destroys sign bits. + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (ShAmt->uge(TyBits) || // Bad shift. - ShAmt->uge(Tmp)) break; // Shifted all sign bits out. - Tmp2 = ShAmt->getZExtValue(); - return Tmp - Tmp2; + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + if (ShAmt->uge(TyBits)) + break; // Bad shift. + unsigned ShAmtLimited = ShAmt->getZExtValue(); + Tmp += ShAmtLimited; + if (Tmp > TyBits) Tmp = TyBits; + } + return Tmp; } - break; - } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: // NOT is handled here. - // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - FirstAnswer = std::min(Tmp, Tmp2); - // We computed what we know about the sign bits as our first - // answer. Now proceed to the generic code that uses - // computeKnownBits, and pick whichever answer is better. + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (ShAmt->uge(TyBits) || // Bad shift. + ShAmt->uge(Tmp)) break; // Shifted all sign bits out. + Tmp2 = ShAmt->getZExtValue(); + return Tmp - Tmp2; + } + break; } - break; - - case Instruction::Select: { - // If we have a clamp pattern, we know that the number of sign bits will be - // the minimum of the clamp min/max range. - const Value *X; - const APInt *CLow, *CHigh; - if (isSignedMinMaxClamp(U, X, CLow, CHigh)) - return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); - - Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp == 1) break; - Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); - return std::min(Tmp, Tmp2); - } - - case Instruction::Add: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - - // Special case decrementing a value (ADD X, -1): - if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) - if (CRHS->isAllOnesValue()) { - KnownBits Known(TyBits); - computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); - - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) - return TyBits; - - // If we are subtracting one from a positive number, there is no carry - // out of the result. - if (Known.isNonNegative()) - return Tmp; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // computeKnownBits, and pick whichever answer is better. } + break; - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) break; - return std::min(Tmp, Tmp2)-1; + case Instruction::Select: { + // If we have a clamp pattern, we know that the number of sign bits will + // be the minimum of the clamp min/max range. + const Value *X; + const APInt *CLow, *CHigh; + if (isSignedMinMaxClamp(U, X, CLow, CHigh)) + return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); + + Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp == 1) break; + Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); + return std::min(Tmp, Tmp2); + } - case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) break; - - // Handle NEG. - if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) - if (CLHS->isNullValue()) { - KnownBits Known(TyBits); - computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) - return TyBits; - - // If the input is known to be positive (the sign bit is known clear), - // the output of the NEG has the same number of sign bits as the input. - if (Known.isNonNegative()) - return Tmp2; - - // Otherwise, we treat this like a SUB. - } + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + + // Special case decrementing a value (ADD X, -1): + if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); + + // If the input is known to be 0 or 1, the output is 0/-1, which is + // all sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known.isNonNegative()) + return Tmp; + } - // Sub can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - return std::min(Tmp, Tmp2)-1; + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) break; + return std::min(Tmp, Tmp2) - 1; - case Instruction::Mul: { - // The output of the Mul can be at most twice the valid bits in the inputs. - unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (SignBitsOp0 == 1) break; - unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (SignBitsOp1 == 1) break; - unsigned OutValidBits = - (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); - return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; - } + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) break; + + // Handle NEG. + if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) + if (CLHS->isNullValue()) { + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); + // If the input is known to be 0 or 1, the output is 0/-1, which is + // all sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the + // input. + if (Known.isNonNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } - case Instruction::PHI: { - const PHINode *PN = cast<PHINode>(U); - unsigned NumIncomingValues = PN->getNumIncomingValues(); - // Don't analyze large in-degree PHIs. - if (NumIncomingValues > 4) break; - // Unreachable blocks may have zero-operand PHI nodes. - if (NumIncomingValues == 0) break; - - // Take the minimum of all incoming values. This can't infinitely loop - // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); - for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { - if (Tmp == 1) return Tmp; - Tmp = std::min( - Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + return std::min(Tmp, Tmp2) - 1; + + case Instruction::Mul: { + // The output of the Mul can be at most twice the valid bits in the + // inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (SignBitsOp0 == 1) break; + unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (SignBitsOp1 == 1) break; + unsigned OutValidBits = + (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); + return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; } - return Tmp; - } - case Instruction::Trunc: - // FIXME: it's tricky to do anything useful for this, but it is an important - // case for targets like X86. - break; + case Instruction::PHI: { + const PHINode *PN = cast<PHINode>(U); + unsigned NumIncomingValues = PN->getNumIncomingValues(); + // Don't analyze large in-degree PHIs. + if (NumIncomingValues > 4) break; + // Unreachable blocks may have zero-operand PHI nodes. + if (NumIncomingValues == 0) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); + for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min( + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); + } + return Tmp; + } - case Instruction::ExtractElement: - // Look through extract element. At the moment we keep this simple and skip - // tracking the specific element. But at least we might find information - // valid for all elements of the vector (for example if vector is sign - // extended, shifted, etc). - return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - - case Instruction::ShuffleVector: { - // TODO: This is copied almost directly from the SelectionDAG version of - // ComputeNumSignBits. It would be better if we could share common - // code. If not, make sure that changes are translated to the DAG. - - // Collect the minimum number of sign bits that are shared by every vector - // element referenced by the shuffle. - auto *Shuf = cast<ShuffleVectorInst>(U); - int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); - int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - for (int i = 0; i != NumMaskElts; ++i) { - int M = Shuf->getMaskValue(i); - assert(M < NumElts * 2 && "Invalid shuffle mask constant"); - // For undef elements, we don't know anything about the common state of - // the shuffle result. - if (M == -1) - return 1; - if (M < NumElts) - DemandedLHS.setBit(M % NumElts); - else - DemandedRHS.setBit(M % NumElts); + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an + // important case for targets like X86. + break; + + case Instruction::ExtractElement: + // Look through extract element. At the moment we keep this simple and + // skip tracking the specific element. But at least we might find + // information valid for all elements of the vector (for example if vector + // is sign extended, shifted, etc). + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + + case Instruction::ShuffleVector: { + // TODO: This is copied almost directly from the SelectionDAG version of + // ComputeNumSignBits. It would be better if we could share common + // code. If not, make sure that changes are translated to the DAG. + + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + auto *Shuf = cast<ShuffleVectorInst>(U); + int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); + int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + for (int i = 0; i != NumMaskElts; ++i) { + int M = Shuf->getMaskValue(i); + assert(M < NumElts * 2 && "Invalid shuffle mask constant"); + // For undef elements, we don't know anything about the common state of + // the shuffle result. + if (M == -1) + return 1; + if (M < NumElts) + DemandedLHS.setBit(M % NumElts); + else + DemandedRHS.setBit(M % NumElts); + } + Tmp = std::numeric_limits<unsigned>::max(); + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits + // fall-back. + if (Tmp == 1) + break; + assert(Tmp <= V->getType()->getScalarSizeInBits() && + "Failed to determine minimum sign bits"); + return Tmp; } - Tmp = std::numeric_limits<unsigned>::max(); - if (!!DemandedLHS) - Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); - if (!!DemandedRHS) { - Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); - Tmp = std::min(Tmp, Tmp2); } - // If we don't know anything, early out and try computeKnownBits fall-back. - if (Tmp == 1) - break; - assert(Tmp <= V->getType()->getScalarSizeInBits() && - "Failed to determine minimum sign bits"); - return Tmp; - } } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -2655,8 +2764,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, /// through SExt instructions only if LookThroughSExt is true. bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt, unsigned Depth) { - const unsigned MaxDepth = 6; - assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); @@ -3086,6 +3193,58 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) { return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0); } +bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI, + unsigned Depth) { + assert(V->getType()->isFPOrFPVectorTy() && "Querying for Inf on non-FP type"); + + // If we're told that infinities won't happen, assume they won't. + if (auto *FPMathOp = dyn_cast<FPMathOperator>(V)) + if (FPMathOp->hasNoInfs()) + return true; + + // Handle scalar constants. + if (auto *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->isInfinity(); + + if (Depth == MaxDepth) + return false; + + if (auto *Inst = dyn_cast<Instruction>(V)) { + switch (Inst->getOpcode()) { + case Instruction::Select: { + return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) && + isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1); + } + case Instruction::UIToFP: + // If the input type fits into the floating type the result is finite. + return ilogb(APFloat::getLargest( + Inst->getType()->getScalarType()->getFltSemantics())) >= + (int)Inst->getOperand(0)->getType()->getScalarSizeInBits(); + default: + break; + } + } + + // Bail out for constant expressions, but try to handle vector constants. + if (!V->getType()->isVectorTy() || !isa<Constant>(V)) + return false; + + // For vectors, verify that each element is not infinity. + unsigned NumElts = V->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = cast<Constant>(V)->getAggregateElement(i); + if (!Elt) + return false; + if (isa<UndefValue>(Elt)) + continue; + auto *CElt = dyn_cast<ConstantFP>(Elt); + if (!CElt || CElt->isInfinity()) + return false; + } + // All elements were confirmed non-infinity or undefined. + return true; +} + bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth) { assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type"); @@ -3105,13 +3264,26 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, if (auto *Inst = dyn_cast<Instruction>(V)) { switch (Inst->getOpcode()) { case Instruction::FAdd: - case Instruction::FMul: case Instruction::FSub: + // Adding positive and negative infinity produces NaN. + return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) && + isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) && + (isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) || + isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1)); + + case Instruction::FMul: + // Zero multiplied with infinity produces NaN. + // FIXME: If neither side can be zero fmul never produces NaN. + return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) && + isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) && + isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) && + isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1); + case Instruction::FDiv: - case Instruction::FRem: { - // TODO: Need isKnownNeverInfinity + case Instruction::FRem: + // FIXME: Only 0/0, Inf/Inf, Inf REM x and x REM 0 produce NaN. return false; - } + case Instruction::Select: { return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) && isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1); @@ -3651,23 +3823,28 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { return Len == ~0ULL ? 1 : Len; } -const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) { +const Value * +llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, + bool MustPreserveNullness) { assert(Call && "getArgumentAliasingToReturnedPointer only works on nonnull calls"); if (const Value *RV = Call->getReturnedArgOperand()) return RV; // This can be used only as a aliasing property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + Call, MustPreserveNullness)) return Call->getArgOperand(0); return nullptr; } bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - const CallBase *Call) { + const CallBase *Call, bool MustPreserveNullness) { return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group || Call->getIntrinsicID() == Intrinsic::aarch64_irg || - Call->getIntrinsicID() == Intrinsic::aarch64_tagp; + Call->getIntrinsicID() == Intrinsic::aarch64_tagp || + (!MustPreserveNullness && + Call->getIntrinsicID() == Intrinsic::ptrmask); } /// \p PN defines a loop-variant pointer to an object. Check if the @@ -3725,7 +3902,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) { + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { V = RP; continue; } @@ -3865,6 +4042,18 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } +bool llvm::mustSuppressSpeculation(const LoadInst &LI) { + if (!LI.isUnordered()) + return true; + const Function &F = *LI.getFunction(); + // Speculative load may create a race that did not exist in the source. + return F.hasFnAttribute(Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress); +} + + bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { @@ -3909,17 +4098,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(Inst); - if (!LI->isUnordered() || - // Speculative load may create a race that did not exist in the source. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || - // Speculative load may load data from dirty regions. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || - LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) + if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), - LI->getType(), LI->getAlignment(), - DL, CtxI, DT); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()), + DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast<const CallInst>(Inst); @@ -4201,6 +4385,20 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } +bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V) { + // If the value is a freeze instruction, then it can never + // be undef or poison. + if (isa<FreezeInst>(V)) + return true; + // TODO: Some instructions are guaranteed to return neither undef + // nor poison if their arguments are not poison/undef. + + // TODO: Deal with other Constant subclasses. + if (isa<ConstantInt>(V) || isa<GlobalVariable>(V)) + return true; + + return false; +} OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, @@ -4221,22 +4419,9 @@ OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { - // A memory operation returns normally if it isn't volatile. A volatile - // operation is allowed to trap. - // - // An atomic operation isn't guaranteed to return in a reasonable amount of - // time because it's possible for another thread to interfere with it for an + // Note: An atomic operation isn't guaranteed to return in a reasonable amount + // of time because it's possible for another thread to interfere with it for an // arbitrary length of time, but programs aren't allowed to rely on that. - if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - return !LI->isVolatile(); - if (const StoreInst *SI = dyn_cast<StoreInst>(I)) - return !SI->isVolatile(); - if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I)) - return !CXI->isVolatile(); - if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) - return !RMWI->isVolatile(); - if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I)) - return !MII->isVolatile(); // If there is no successor, then execution can't transfer to it. if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) @@ -4277,10 +4462,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // FIXME: This isn't aggressive enough; a call which only writes to a global // is guaranteed to return. - return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || - match(I, m_Intrinsic<Intrinsic::assume>()) || - match(I, m_Intrinsic<Intrinsic::sideeffect>()) || - match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()); + return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory(); } // Other instructions return normally. @@ -4572,12 +4754,12 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, // TODO: Allow FP min/max with nnan/nsz. assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); - Value *A, *B; + Value *A = nullptr, *B = nullptr; SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); if (!SelectPatternResult::isMinOrMax(L.Flavor)) return {SPF_UNKNOWN, SPNB_NA, false}; - Value *C, *D; + Value *C = nullptr, *D = nullptr; SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -5627,8 +5809,8 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower, } static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, - APInt &Upper) { - const Value *LHS, *RHS; + APInt &Upper, const InstrInfoQuery &IIQ) { + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); if (R.Flavor == SPF_UNKNOWN) return; @@ -5640,7 +5822,8 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, // then the result of abs(X) is [0..SIGNED_MAX], // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. Lower = APInt::getNullValue(BitWidth); - if (cast<Instruction>(RHS)->hasNoSignedWrap()) + if (match(RHS, m_Neg(m_Specific(LHS))) && + IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) Upper = APInt::getSignedMaxValue(BitWidth) + 1; else Upper = APInt::getSignedMinValue(BitWidth) + 1; @@ -5694,7 +5877,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { else if (auto *II = dyn_cast<IntrinsicInst>(V)) setLimitsForIntrinsic(*II, Lower, Upper); else if (auto *SI = dyn_cast<SelectInst>(V)) - setLimitsForSelectPattern(*SI, Lower, Upper); + setLimitsForSelectPattern(*SI, Lower, Upper, IIQ); ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper); @@ -5704,3 +5887,111 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { return CR; } + +static Optional<int64_t> +getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!OpC) + return None; + if (OpC->isZero()) + continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (StructType *STy = GTI.getStructTypeOrNull()) { + Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size * OpC->getSExtValue(); + } + + return Offset; +} + +Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, + const DataLayout &DL) { + Ptr1 = Ptr1->stripPointerCasts(); + Ptr2 = Ptr2->stripPointerCasts(); + + // Handle the trivial case first. + if (Ptr1 == Ptr2) { + return 0; + } + + const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1); + const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2); + + // If one pointer is a GEP see if the GEP is a constant offset from the base, + // as in "P" and "gep P, 1". + // Also do this iteratively to handle the the following case: + // Ptr_t1 = GEP Ptr1, c1 + // Ptr_t2 = GEP Ptr_t1, c2 + // Ptr2 = GEP Ptr_t2, c3 + // where we will return c1+c2+c3. + // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base + // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases + // are the same, and return the difference between offsets. + auto getOffsetFromBase = [&DL](const GEPOperator *GEP, + const Value *Ptr) -> Optional<int64_t> { + const GEPOperator *GEP_T = GEP; + int64_t OffsetVal = 0; + bool HasSameBase = false; + while (GEP_T) { + auto Offset = getOffsetFromIndex(GEP_T, 1, DL); + if (!Offset) + return None; + OffsetVal += *Offset; + auto Op0 = GEP_T->getOperand(0)->stripPointerCasts(); + if (Op0 == Ptr) { + HasSameBase = true; + break; + } + GEP_T = dyn_cast<GEPOperator>(Op0); + } + if (!HasSameBase) + return None; + return OffsetVal; + }; + + if (GEP1) { + auto Offset = getOffsetFromBase(GEP1, Ptr2); + if (Offset) + return -*Offset; + } + if (GEP2) { + auto Offset = getOffsetFromBase(GEP2, Ptr1); + if (Offset) + return Offset; + } + + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return None; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL); + auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL); + if (!Offset1 || !Offset2) + return None; + return *Offset2 - *Offset1; +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp index 986756eb2627..c45ab941a142 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" +#include "llvm/Support/CommandLine.h" #define DEBUG_TYPE "vectorutils" @@ -56,6 +57,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: @@ -98,6 +100,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: return (ScalarOpdIdx == 2); default: return false; @@ -830,15 +833,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses( /*Assume=*/true, /*ShouldCheckWrap=*/false); const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); + PointerType *PtrTy = cast<PointerType>(Ptr->getType()); uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); // An alignment of 0 means target ABI alignment. - unsigned Align = getLoadStoreAlignment(&I); - if (!Align) - Align = DL.getABITypeAlignment(PtrTy->getElementType()); + MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I)); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType())); - AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align); + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment); } } @@ -925,7 +928,7 @@ void InterleavedAccessInfo::analyzeInterleaving( if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); - Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); + Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); @@ -964,6 +967,10 @@ void InterleavedAccessInfo::analyzeInterleaving( // instructions that precede it. if (isInterleaved(A)) { InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A); + + LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to " + "dependence between " << *A << " and "<< *B << '\n'); + StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } @@ -1028,7 +1035,7 @@ void InterleavedAccessInfo::analyzeInterleaving( Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); // Try to insert A into B's group. - if (Group->insertMember(A, IndexA, DesA.Align)) { + if (Group->insertMember(A, IndexA, DesA.Alignment)) { LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); @@ -1153,3 +1160,69 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const { propagateMetadata(NewInst, VL); } } + +void VFABI::getVectorVariantNames( + const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) { + const StringRef S = + CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName) + .getValueAsString(); + if (S.empty()) + return; + + SmallVector<StringRef, 8> ListAttr; + S.split(ListAttr, ","); + + for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { +#ifndef NDEBUG + Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S); + assert(Info.hasValue() && "Invalid name for a VFABI variant."); + assert(CI.getModule()->getFunction(Info.getValue().VectorName) && + "Vector function is missing."); +#endif + VariantMappings.push_back(S); + } +} + +bool VFShape::hasValidParameterList() const { + for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams; + ++Pos) { + assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list."); + + switch (Parameters[Pos].ParamKind) { + default: // Nothing to check. + break; + case VFParamKind::OMP_Linear: + case VFParamKind::OMP_LinearRef: + case VFParamKind::OMP_LinearVal: + case VFParamKind::OMP_LinearUVal: + // Compile time linear steps must be non-zero. + if (Parameters[Pos].LinearStepOrPos == 0) + return false; + break; + case VFParamKind::OMP_LinearPos: + case VFParamKind::OMP_LinearRefPos: + case VFParamKind::OMP_LinearValPos: + case VFParamKind::OMP_LinearUValPos: + // The runtime linear step must be referring to some other + // parameters in the signature. + if (Parameters[Pos].LinearStepOrPos >= int(NumParams)) + return false; + // The linear step parameter must be marked as uniform. + if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind != + VFParamKind::OMP_Uniform) + return false; + // The linear step parameter can't point at itself. + if (Parameters[Pos].LinearStepOrPos == int(Pos)) + return false; + break; + case VFParamKind::GlobalPredicate: + // The global predicate must be the unique. Can be placed anywhere in the + // signature. + for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos) + if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate) + return false; + break; + } + } + return true; +} |
