diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Analysis | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Notes
Diffstat (limited to 'lib/Analysis')
49 files changed, 6220 insertions, 2666 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 84da76be98bb..4c29aeaa622f 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -332,8 +332,8 @@ FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) { ModRefInfo AAResults::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { - // Be conservative in the face of volatile/atomic. - if (!L->isUnordered()) + // Be conservative in the face of atomic. + if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered)) return MRI_ModRef; // If the load address doesn't alias the given address, it doesn't read @@ -347,8 +347,8 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L, ModRefInfo AAResults::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { - // Be conservative in the face of volatile/atomic. - if (!S->isUnordered()) + // Be conservative in the face of atomic. + if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered)) return MRI_ModRef; if (Loc.Ptr) { @@ -367,6 +367,14 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S, return MRI_Mod; } +ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { + // If we know that the location is a constant memory location, the fence + // cannot modify this location. + if (Loc.Ptr && pointsToConstantMemory(Loc)) + return MRI_Ref; + return MRI_ModRef; +} + ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { @@ -689,7 +697,7 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, bool llvm::isNoAliasCall(const Value *V) { if (auto CS = ImmutableCallSite(V)) - return CS.paramHasAttr(0, Attribute::NoAlias); + return CS.hasRetAttr(Attribute::NoAlias); return false; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 701b0e1a5925..16b711a69ec3 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -199,9 +199,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, // Check the unknown instructions... if (!UnknownInsts.empty()) { for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) - if (AA.getModRefInfo(UnknownInsts[i], - MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef) - return true; + if (auto *Inst = getUnknownInst(i)) + if (AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)) != + MRI_NoModRef) + return true; } return false; @@ -217,10 +218,12 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, return false; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { - ImmutableCallSite C1(getUnknownInst(i)), C2(Inst); - if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || - AA.getModRefInfo(C2, C1) != MRI_NoModRef) - return true; + if (auto *Inst = getUnknownInst(i)) { + ImmutableCallSite C1(Inst), C2(Inst); + if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || + AA.getModRefInfo(C2, C1) != MRI_NoModRef) + return true; + } } for (iterator I = begin(), E = end(); I != E; ++I) @@ -471,7 +474,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { // If there are any call sites in the alias set, add them to this AST. for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i) - add(AS.UnknownInsts[i]); + if (auto *Inst = AS.getUnknownInst(i)) + add(Inst); // Loop over all of the pointers in this alias set. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { @@ -489,19 +493,6 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { // dangling pointers to deleted instructions. // void AliasSetTracker::deleteValue(Value *PtrVal) { - // If this is a call instruction, remove the callsite from the appropriate - // AliasSet (if present). - if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) { - if (Inst->mayReadOrWriteMemory()) { - // Scan all the alias sets to see if this call site is contained. - for (iterator I = begin(), E = end(); I != E;) { - iterator Cur = I++; - if (!Cur->Forward) - Cur->removeUnknownInst(*this, Inst); - } - } - } - // First, look up the PointerRec for this pointer. PointerMapType::iterator I = PointerMap.find_as(PtrVal); if (I == PointerMap.end()) return; // Noop @@ -633,7 +624,8 @@ void AliasSet::print(raw_ostream &OS) const { OS << "\n " << UnknownInsts.size() << " Unknown instructions: "; for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (i) OS << ", "; - UnknownInsts[i]->printAsOperand(OS); + if (auto *I = getUnknownInst(i)) + I->printAsOperand(OS); } } OS << "\n"; diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 0e7cf402cdb5..0e0b5c92a918 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -57,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyBranchProbabilityInfoPassPass(Registry); initializeLazyBlockFrequencyInfoPassPass(Registry); initializeLazyValueInfoWrapperPassPass(Registry); + initializeLazyValueInfoPrinterPass(Registry); initializeLintPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); @@ -78,6 +79,8 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeTypeBasedAAWrapperPassPass(Registry); initializeScopedNoAliasAAWrapperPassPass(Registry); initializeLCSSAVerificationPassPass(Registry); + initializeMemorySSAWrapperPassPass(Registry); + initializeMemorySSAPrinterLegacyPassPass(Registry); } void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index 5851594700a4..1fae94724487 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -24,6 +24,11 @@ using namespace llvm; using namespace llvm::PatternMatch; +static cl::opt<bool> + VerifyAssumptionCache("verify-assumption-cache", cl::Hidden, + cl::desc("Enable verification of assumption cache"), + cl::init(false)); + SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) { // Try using find_as first to avoid creating extra value handles just for the // purpose of doing the lookup. @@ -47,9 +52,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) { } else if (auto *I = dyn_cast<Instruction>(V)) { Affected.push_back(I); - if (I->getOpcode() == Instruction::BitCast || - I->getOpcode() == Instruction::PtrToInt) { - auto *Op = I->getOperand(0); + // Peek through unary operators to find the source of the condition. + Value *Op; + if (match(I, m_BitCast(m_Value(Op))) || + match(I, m_PtrToInt(m_Value(Op))) || + match(I, m_Not(m_Value(Op)))) { if (isa<Instruction>(Op) || isa<Argument>(Op)) Affected.push_back(Op); } @@ -229,7 +236,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) { } void AssumptionCacheTracker::verifyAnalysis() const { -#ifndef NDEBUG + // FIXME: In the long term the verifier should not be controllable with a + // flag. We should either fix all passes to correctly update the assumption + // cache and enable the verifier unconditionally or somehow arrange for the + // assumption list to be updated automatically by passes. + if (!VerifyAssumptionCache) + return; + SmallPtrSet<const CallInst *, 4> AssumptionSet; for (const auto &I : AssumptionCaches) { for (auto &VH : I.second->assumptions()) @@ -238,11 +251,10 @@ void AssumptionCacheTracker::verifyAnalysis() const { for (const BasicBlock &B : cast<Function>(*I.first)) for (const Instruction &II : B) - if (match(&II, m_Intrinsic<Intrinsic::assume>())) - assert(AssumptionSet.count(cast<CallInst>(&II)) && - "Assumption in scanned function not in cache"); + if (match(&II, m_Intrinsic<Intrinsic::assume>()) && + !AssumptionSet.count(cast<CallInst>(&II))) + report_fatal_error("Assumption in scanned function not in cache"); } -#endif } AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) { diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index c8d057949493..09582cf9a71d 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -127,7 +127,9 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, DL, &TLI, RoundToAlign)) + ObjectSizeOpts Opts; + Opts.RoundToAlign = RoundToAlign; + if (getObjectSize(V, Size, DL, &TLI, Opts)) return Size; return MemoryLocation::UnknownSize; } @@ -635,7 +637,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { /// Returns true if this is a writeonly (i.e Mod only) parameter. static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx, const TargetLibraryInfo &TLI) { - if (CS.paramHasAttr(ArgIdx + 1, Attribute::WriteOnly)) + if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly)) return true; // We can bound the aliasing properties of memset_pattern16 just as we can @@ -644,9 +646,9 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx, // whenever possible. // FIXME Consider handling this in InferFunctionAttr.cpp together with other // attributes. - LibFunc::Func F; + LibFunc F; if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) && - F == LibFunc::memset_pattern16 && TLI.has(F)) + F == LibFunc_memset_pattern16 && TLI.has(F)) if (ArgIdx == 0) return true; @@ -664,10 +666,10 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, if (isWriteOnlyParam(CS, ArgIdx, TLI)) return MRI_Mod; - if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) + if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly)) return MRI_Ref; - if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone)) + if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone)) return MRI_NoModRef; return AAResultBase::getArgModRefInfo(CS, ArgIdx); @@ -749,7 +751,11 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // as an argument, and itself doesn't capture it. if (!isa<Constant>(Object) && CS.getInstruction() != Object && isNonEscapingLocalObject(Object)) { - bool PassedAsArg = false; + + // Optimistically assume that call doesn't touch Object and check this + // assumption in the following loop. + ModRefInfo Result = MRI_NoModRef; + unsigned OperandNo = 0; for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); CI != CE; ++CI, ++OperandNo) { @@ -761,20 +767,38 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo))) continue; + // Call doesn't access memory through this operand, so we don't care + // if it aliases with Object. + if (CS.doesNotAccessMemory(OperandNo)) + continue; + // If this is a no-capture pointer argument, see if we can tell that it - // is impossible to alias the pointer we're checking. If not, we have to - // assume that the call could touch the pointer, even though it doesn't - // escape. + // is impossible to alias the pointer we're checking. AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); - if (AR) { - PassedAsArg = true; - break; + + // Operand doesnt alias 'Object', continue looking for other aliases + if (AR == NoAlias) + continue; + // Operand aliases 'Object', but call doesn't modify it. Strengthen + // initial assumption and keep looking in case if there are more aliases. + if (CS.onlyReadsMemory(OperandNo)) { + Result = static_cast<ModRefInfo>(Result | MRI_Ref); + continue; + } + // Operand aliases 'Object' but call only writes into it. + if (CS.doesNotReadMemory(OperandNo)) { + Result = static_cast<ModRefInfo>(Result | MRI_Mod); + continue; } + // This operand aliases 'Object' and call reads and writes into it. + Result = MRI_ModRef; + break; } - if (!PassedAsArg) - return MRI_NoModRef; + // Early return if we improved mod ref information + if (Result != MRI_ModRef) + return Result; } // If the CallSite is to malloc or calloc, we can assume that it doesn't diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 4cdbe4d0fcf6..07a2a9229fd5 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -26,7 +26,6 @@ using namespace llvm; #define DEBUG_TYPE "block-freq" -#ifndef NDEBUG static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG( "view-block-freq-propagation-dags", cl::Hidden, cl::desc("Pop up a window to show a dag displaying how block " @@ -55,8 +54,29 @@ cl::opt<unsigned> "is no less than the max frequency of the " "function multiplied by this percent.")); +// Command line option to turn on CFG dot dump after profile annotation. +cl::opt<bool> + PGOViewCounts("pgo-view-counts", cl::init(false), cl::Hidden, + cl::desc("A boolean option to show CFG dag with " + "block profile counts and branch probabilities " + "right after PGO profile annotation step. The " + "profile counts are computed using branch " + "probabilities from the runtime profile data and " + "block frequency propagation algorithm. To view " + "the raw counts from the profile, use option " + "-pgo-view-raw-counts instead. To limit graph " + "display to only one function, use filtering option " + "-view-bfi-func-name.")); + namespace llvm { +static GVDAGType getGVDT() { + + if (PGOViewCounts) + return GVDT_Count; + return ViewBlockFreqPropagationDAG; +} + template <> struct GraphTraits<BlockFrequencyInfo *> { typedef const BasicBlock *NodeRef; @@ -89,8 +109,7 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase { std::string getNodeLabel(const BasicBlock *Node, const BlockFrequencyInfo *Graph) { - return BFIDOTGTraitsBase::getNodeLabel(Node, Graph, - ViewBlockFreqPropagationDAG); + return BFIDOTGTraitsBase::getNodeLabel(Node, Graph, getGVDT()); } std::string getNodeAttributes(const BasicBlock *Node, @@ -107,7 +126,6 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase { }; } // end namespace llvm -#endif BlockFrequencyInfo::BlockFrequencyInfo() {} @@ -132,19 +150,26 @@ BlockFrequencyInfo &BlockFrequencyInfo::operator=(BlockFrequencyInfo &&RHS) { // template instantiated which is not available in the header. BlockFrequencyInfo::~BlockFrequencyInfo() {} +bool BlockFrequencyInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<BlockFrequencyAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || + PAC.preservedSet<CFGAnalyses>()); +} + void BlockFrequencyInfo::calculate(const Function &F, const BranchProbabilityInfo &BPI, const LoopInfo &LI) { if (!BFI) BFI.reset(new ImplType); BFI->calculate(F, BPI, LI); -#ifndef NDEBUG if (ViewBlockFreqPropagationDAG != GVDT_None && (ViewBlockFreqFuncName.empty() || F.getName().equals(ViewBlockFreqFuncName))) { view(); } -#endif } BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { @@ -171,16 +196,32 @@ void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) { BFI->setBlockFreq(BB, Freq); } +void BlockFrequencyInfo::setBlockFreqAndScale( + const BasicBlock *ReferenceBB, uint64_t Freq, + SmallPtrSetImpl<BasicBlock *> &BlocksToScale) { + assert(BFI && "Expected analysis to be available"); + // Use 128 bits APInt to avoid overflow. + APInt NewFreq(128, Freq); + APInt OldFreq(128, BFI->getBlockFreq(ReferenceBB).getFrequency()); + APInt BBFreq(128, 0); + for (auto *BB : BlocksToScale) { + BBFreq = BFI->getBlockFreq(BB).getFrequency(); + // Multiply first by NewFreq and then divide by OldFreq + // to minimize loss of precision. + BBFreq *= NewFreq; + // udiv is an expensive operation in the general case. If this ends up being + // a hot spot, one of the options proposed in + // https://reviews.llvm.org/D28535#650071 could be used to avoid this. + BBFreq = BBFreq.udiv(OldFreq); + BFI->setBlockFreq(BB, BBFreq.getLimitedValue()); + } + BFI->setBlockFreq(ReferenceBB, Freq); +} + /// Pop up a ghostview window with the current block frequency propagation /// rendered using dot. void BlockFrequencyInfo::view() const { -// This code is only for debugging. -#ifndef NDEBUG ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs"); -#else - errs() << "BlockFrequencyInfo::view is only available in debug builds on " - "systems with Graphviz or gv!\n"; -#endif // NDEBUG } const Function *BlockFrequencyInfo::getFunction() const { diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 9850e02fca22..e5d8c3347c16 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -28,7 +28,9 @@ ScaledNumber<uint64_t> BlockMass::toScaled() const { return ScaledNumber<uint64_t>(getMass() + 1, -64); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); } +#endif static char getHexDigit(int N) { assert(N < 16); diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 3eabb780398c..5935dec15c70 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -108,11 +108,9 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -/// \brief Calculate edge weights for successors lead to unreachable. -/// -/// Predict that a successor which leads necessarily to an -/// unreachable-terminated block as extremely unlikely. -bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { +/// \brief Add \p BB to PostDominatedByUnreachable set if applicable. +void +BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { if (isa<UnreachableInst>(TI) || @@ -122,38 +120,86 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { // never execute. BB->getTerminatingDeoptimizeCall()) PostDominatedByUnreachable.insert(BB); - return false; + return; + } + + // If the terminator is an InvokeInst, check only the normal destination block + // as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(TI)) { + if (PostDominatedByUnreachable.count(II->getNormalDest())) + PostDominatedByUnreachable.insert(BB); + return; } + for (auto *I : successors(BB)) + // If any of successor is not post dominated then BB is also not. + if (!PostDominatedByUnreachable.count(I)) + return; + + PostDominatedByUnreachable.insert(BB); +} + +/// \brief Add \p BB to PostDominatedByColdCall set if applicable. +void +BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { + assert(!PostDominatedByColdCall.count(BB)); + const TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return; + + // If all of successor are post dominated then BB is also done. + if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { + return PostDominatedByColdCall.count(SuccBB); + })) { + PostDominatedByColdCall.insert(BB); + return; + } + + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(TI)) + if (PostDominatedByColdCall.count(II->getNormalDest())) { + PostDominatedByColdCall.insert(BB); + return; + } + + // Otherwise, if the block itself contains a cold function, add it to the + // set of blocks post-dominated by a cold call. + for (auto &I : *BB) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->hasFnAttr(Attribute::Cold)) { + PostDominatedByColdCall.insert(BB); + return; + } +} + +/// \brief Calculate edge weights for successors lead to unreachable. +/// +/// Predict that a successor which leads necessarily to an +/// unreachable-terminated block as extremely unlikely. +bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { + const TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return false; + SmallVector<unsigned, 4> UnreachableEdges; SmallVector<unsigned, 4> ReachableEdges; - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) if (PostDominatedByUnreachable.count(*I)) UnreachableEdges.push_back(I.getSuccessorIndex()); else ReachableEdges.push_back(I.getSuccessorIndex()); - } - - // If all successors are in the set of blocks post-dominated by unreachable, - // this block is too. - if (UnreachableEdges.size() == TI->getNumSuccessors()) - PostDominatedByUnreachable.insert(BB); // Skip probabilities if this block has a single successor or if all were // reachable. if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) return false; - // If the terminator is an InvokeInst, check only the normal destination block - // as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast<InvokeInst>(TI)) - if (PostDominatedByUnreachable.count(II->getNormalDest())) { - PostDominatedByUnreachable.insert(BB); - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - return false; - } + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + if (isa<InvokeInst>(TI)) + return false; if (ReachableEdges.empty()) { BranchProbability Prob(1, UnreachableEdges.size()); @@ -263,31 +309,10 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { else NormalEdges.push_back(I.getSuccessorIndex()); - // If all successors are in the set of blocks post-dominated by cold calls, - // this block is in the set post-dominated by cold calls. - if (ColdEdges.size() == TI->getNumSuccessors()) - PostDominatedByColdCall.insert(BB); - else { - // Otherwise, if the block itself contains a cold function, add it to the - // set of blocks postdominated by a cold call. - assert(!PostDominatedByColdCall.count(BB)); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (CI->hasFnAttr(Attribute::Cold)) { - PostDominatedByColdCall.insert(BB); - break; - } - } - - if (auto *II = dyn_cast<InvokeInst>(TI)) { - // If the terminator is an InvokeInst, consider only the normal destination - // block. - if (PostDominatedByColdCall.count(II->getNormalDest())) - PostDominatedByColdCall.insert(BB); - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + if (isa<InvokeInst>(TI)) return false; - } // Skip probabilities if this block has a single successor. if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) @@ -671,6 +696,8 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + updatePostDominatedByUnreachable(BB); + updatePostDominatedByColdCall(BB); if (calcUnreachableHeuristics(BB)) continue; if (calcMetadataWeights(BB)) diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp index e48ff230f43c..ddd5123d0eff 100644 --- a/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -307,7 +307,7 @@ class CFLAndersAAResult::FunctionInfo { public: FunctionInfo(const Function &, const SmallVectorImpl<Value *> &, - const ReachabilitySet &, AliasAttrMap); + const ReachabilitySet &, const AliasAttrMap &); bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const; const AliasSummary &getAliasSummary() const { return Summary; } @@ -470,7 +470,7 @@ static void populateExternalAttributes( CFLAndersAAResult::FunctionInfo::FunctionInfo( const Function &Fn, const SmallVectorImpl<Value *> &RetVals, - const ReachabilitySet &ReachSet, AliasAttrMap AMap) { + const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) { populateAttrMap(AttrMap, AMap); populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap); populateAliasMap(AliasMap, ReachSet); diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp index 054bdc45ad67..9d4521221f47 100644 --- a/lib/Analysis/CGSCCPassManager.cpp +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -117,6 +117,7 @@ bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>(); // Ok, we have a graph, so we can propagate the invalidation down into it. + G->buildRefSCCs(); for (auto &RC : G->postorder_ref_sccs()) for (auto &C : RC) { Optional<PreservedAnalyses> InnerPA; @@ -273,9 +274,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // demoted edges. SmallVector<Constant *, 16> Worklist; SmallPtrSet<Constant *, 16> Visited; - SmallPtrSet<Function *, 16> RetainedEdges; - SmallSetVector<Function *, 4> PromotedRefTargets; - SmallSetVector<Function *, 4> DemotedCallTargets; + SmallPtrSet<Node *, 16> RetainedEdges; + SmallSetVector<Node *, 4> PromotedRefTargets; + SmallSetVector<Node *, 4> DemotedCallTargets; // First walk the function and handle all called functions. We do this first // because if there is a single call edge, whether there are ref edges is @@ -284,7 +285,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) if (Visited.insert(Callee).second && !Callee->isDeclaration()) { - const Edge *E = N.lookup(*Callee); + Node &CalleeN = *G.lookup(*Callee); + Edge *E = N->lookup(CalleeN); // FIXME: We should really handle adding new calls. While it will // make downstream usage more complex, there is no fundamental // limitation and it will allow passes within the CGSCC to be a bit @@ -293,9 +295,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( assert(E && "No function transformations should introduce *new* " "call edges! Any new calls should be modeled as " "promoted existing ref edges!"); - RetainedEdges.insert(Callee); + RetainedEdges.insert(&CalleeN); if (!E->isCall()) - PromotedRefTargets.insert(Callee); + PromotedRefTargets.insert(&CalleeN); } // Now walk all references. @@ -306,24 +308,25 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( Worklist.push_back(C); LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { - const Edge *E = N.lookup(Referee); + Node &RefereeN = *G.lookup(Referee); + Edge *E = N->lookup(RefereeN); // FIXME: Similarly to new calls, we also currently preclude // introducing new references. See above for details. assert(E && "No function transformations should introduce *new* ref " "edges! Any new ref edges would require IPO which " "function passes aren't allowed to do!"); - RetainedEdges.insert(&Referee); + RetainedEdges.insert(&RefereeN); if (E->isCall()) - DemotedCallTargets.insert(&Referee); + DemotedCallTargets.insert(&RefereeN); }); // First remove all of the edges that are no longer present in this function. // We have to build a list of dead targets first and then remove them as the // data structures will all be invalidated by removing them. SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets; - for (Edge &E : N) - if (!RetainedEdges.count(&E.getFunction())) - DeadTargets.push_back({E.getNode(), E.getKind()}); + for (Edge &E : *N) + if (!RetainedEdges.count(&E.getNode())) + DeadTargets.push_back({&E.getNode(), E.getKind()}); for (auto DeadTarget : DeadTargets) { Node &TargetN = *DeadTarget.getPointer(); bool IsCall = DeadTarget.getInt() == Edge::Call; @@ -397,9 +400,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Next demote all the call edges that are now ref edges. This helps make // the SCCs small which should minimize the work below as we don't want to // form cycles that this would break. - for (Function *RefTarget : DemotedCallTargets) { - Node &TargetN = *G.lookup(*RefTarget); - SCC &TargetC = *G.lookupSCC(TargetN); + for (Node *RefTarget : DemotedCallTargets) { + SCC &TargetC = *G.lookupSCC(*RefTarget); RefSCC &TargetRC = TargetC.getOuterRefSCC(); // The easy case is when the target RefSCC is not this RefSCC. This is @@ -407,10 +409,10 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( if (&TargetRC != RC) { assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); - RC->switchOutgoingEdgeToRef(N, TargetN); + RC->switchOutgoingEdgeToRef(N, *RefTarget); if (DebugLogging) dbgs() << "Switch outgoing call edge to a ref edge from '" << N - << "' to '" << TargetN << "'\n"; + << "' to '" << *RefTarget << "'\n"; continue; } @@ -418,7 +420,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // some SCCs. if (C != &TargetC) { // For separate SCCs this is trivial. - RC->switchTrivialInternalEdgeToRef(N, TargetN); + RC->switchTrivialInternalEdgeToRef(N, *RefTarget); continue; } @@ -430,14 +432,13 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // structure is changed. AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. - C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, - N, C, AM, UR, DebugLogging); + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N, + C, AM, UR, DebugLogging); } // Now promote ref edges into call edges. - for (Function *CallTarget : PromotedRefTargets) { - Node &TargetN = *G.lookup(*CallTarget); - SCC &TargetC = *G.lookupSCC(TargetN); + for (Node *CallTarget : PromotedRefTargets) { + SCC &TargetC = *G.lookupSCC(*CallTarget); RefSCC &TargetRC = TargetC.getOuterRefSCC(); // The easy case is when the target RefSCC is not this RefSCC. This is @@ -445,22 +446,22 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( if (&TargetRC != RC) { assert(RC->isAncestorOf(TargetRC) && "Cannot potentially form RefSCC cycles here!"); - RC->switchOutgoingEdgeToCall(N, TargetN); + RC->switchOutgoingEdgeToCall(N, *CallTarget); if (DebugLogging) dbgs() << "Switch outgoing ref edge to a call edge from '" << N - << "' to '" << TargetN << "'\n"; + << "' to '" << *CallTarget << "'\n"; continue; } if (DebugLogging) dbgs() << "Switch an internal ref edge to a call edge from '" << N - << "' to '" << TargetN << "'\n"; + << "' to '" << *CallTarget << "'\n"; // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also // need to make sure to update the worklist in the event SCCs have moved // before the current one in the post-order sequence. auto InitialSCCIndex = RC->find(*C) - RC->begin(); - auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN); + auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, *CallTarget); if (!InvalidatedSCCs.empty()) { C = &TargetC; assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index d53364373d7b..161709a48466 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -53,6 +53,8 @@ add_llvm_library(LLVMAnalysis MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp MemoryLocation.cpp + MemorySSA.cpp + MemorySSAUpdater.cpp ModuleDebugInfoPrinter.cpp ModuleSummaryAnalysis.cpp ObjCARCAliasAnalysis.cpp diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 458b7bfae959..6942176ae6ae 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -125,8 +125,9 @@ void CallGraph::print(raw_ostream &OS) const { CN->print(OS); } -LLVM_DUMP_METHOD -void CallGraph::dump() const { print(dbgs()); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); } +#endif // removeFunctionFromModule - Unlink the function from this module, returning // it. Because this removes the function from the module, the call graph node @@ -194,8 +195,9 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << '\n'; } -LLVM_DUMP_METHOD -void CallGraphNode::dump() const { print(dbgs()); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); } +#endif /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it @@ -307,8 +309,10 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { G->print(OS); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } +#endif namespace { struct CallGraphPrinterLegacyPass : public ModulePass { diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp index 9cef78144150..ea70f5752c61 100644 --- a/lib/Analysis/CallGraphSCCPass.cpp +++ b/lib/Analysis/CallGraphSCCPass.cpp @@ -609,16 +609,28 @@ namespace { } bool runOnSCC(CallGraphSCC &SCC) override { - Out << Banner; + auto PrintBannerOnce = [&] () { + static bool BannerPrinted = false; + if (BannerPrinted) + return; + Out << Banner; + BannerPrinted = true; + }; for (CallGraphNode *CGN : SCC) { if (CGN->getFunction()) { - if (isFunctionInPrintList(CGN->getFunction()->getName())) + if (isFunctionInPrintList(CGN->getFunction()->getName())) { + PrintBannerOnce(); CGN->getFunction()->print(Out); - } else + } + } else if (llvm::isFunctionInPrintList("*")) { + PrintBannerOnce(); Out << "\nPrinting <null> Function\n"; + } } return false; } + + StringRef getPassName() const override { return "Print CallGraph IR"; } }; } // end anonymous namespace. diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 73867279abe4..14176dac2104 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1058,8 +1058,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, if (It == FoldedOps.end()) { if (auto *FoldedC = ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) { - NewC = FoldedC; FoldedOps.insert({NewC, FoldedC}); + NewC = FoldedC; } else { FoldedOps.insert({NewC, NewC}); } @@ -1401,7 +1401,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { return true; default: return false; - case 0: break; + case Intrinsic::not_intrinsic: break; } if (!F->hasName()) @@ -1518,9 +1518,9 @@ Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero, bool isExact = false; APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero : APFloat::rmNearestTiesToEven; - APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth, - /*isSigned=*/true, mode, - &isExact); + APFloat::opStatus status = + Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth, + /*isSigned=*/true, mode, &isExact); if (status != APFloat::opOK && (!roundTowardZero || status != APFloat::opInexact)) return nullptr; @@ -1630,6 +1630,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, return ConstantFoldFP(sin, V, Ty); case Intrinsic::cos: return ConstantFoldFP(cos, V, Ty); + case Intrinsic::sqrt: + return ConstantFoldFP(sqrt, V, Ty); } if (!TLI) @@ -1637,87 +1639,74 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, switch (Name[0]) { case 'a': - if ((Name == "acos" && TLI->has(LibFunc::acos)) || - (Name == "acosf" && TLI->has(LibFunc::acosf))) + if ((Name == "acos" && TLI->has(LibFunc_acos)) || + (Name == "acosf" && TLI->has(LibFunc_acosf))) return ConstantFoldFP(acos, V, Ty); - else if ((Name == "asin" && TLI->has(LibFunc::asin)) || - (Name == "asinf" && TLI->has(LibFunc::asinf))) + else if ((Name == "asin" && TLI->has(LibFunc_asin)) || + (Name == "asinf" && TLI->has(LibFunc_asinf))) return ConstantFoldFP(asin, V, Ty); - else if ((Name == "atan" && TLI->has(LibFunc::atan)) || - (Name == "atanf" && TLI->has(LibFunc::atanf))) + else if ((Name == "atan" && TLI->has(LibFunc_atan)) || + (Name == "atanf" && TLI->has(LibFunc_atanf))) return ConstantFoldFP(atan, V, Ty); break; case 'c': - if ((Name == "ceil" && TLI->has(LibFunc::ceil)) || - (Name == "ceilf" && TLI->has(LibFunc::ceilf))) + if ((Name == "ceil" && TLI->has(LibFunc_ceil)) || + (Name == "ceilf" && TLI->has(LibFunc_ceilf))) return ConstantFoldFP(ceil, V, Ty); - else if ((Name == "cos" && TLI->has(LibFunc::cos)) || - (Name == "cosf" && TLI->has(LibFunc::cosf))) + else if ((Name == "cos" && TLI->has(LibFunc_cos)) || + (Name == "cosf" && TLI->has(LibFunc_cosf))) return ConstantFoldFP(cos, V, Ty); - else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) || - (Name == "coshf" && TLI->has(LibFunc::coshf))) + else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || + (Name == "coshf" && TLI->has(LibFunc_coshf))) return ConstantFoldFP(cosh, V, Ty); break; case 'e': - if ((Name == "exp" && TLI->has(LibFunc::exp)) || - (Name == "expf" && TLI->has(LibFunc::expf))) + if ((Name == "exp" && TLI->has(LibFunc_exp)) || + (Name == "expf" && TLI->has(LibFunc_expf))) return ConstantFoldFP(exp, V, Ty); - if ((Name == "exp2" && TLI->has(LibFunc::exp2)) || - (Name == "exp2f" && TLI->has(LibFunc::exp2f))) + if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || + (Name == "exp2f" && TLI->has(LibFunc_exp2f))) // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); break; case 'f': - if ((Name == "fabs" && TLI->has(LibFunc::fabs)) || - (Name == "fabsf" && TLI->has(LibFunc::fabsf))) + if ((Name == "fabs" && TLI->has(LibFunc_fabs)) || + (Name == "fabsf" && TLI->has(LibFunc_fabsf))) return ConstantFoldFP(fabs, V, Ty); - else if ((Name == "floor" && TLI->has(LibFunc::floor)) || - (Name == "floorf" && TLI->has(LibFunc::floorf))) + else if ((Name == "floor" && TLI->has(LibFunc_floor)) || + (Name == "floorf" && TLI->has(LibFunc_floorf))) return ConstantFoldFP(floor, V, Ty); break; case 'l': - if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc::logf))) + if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || + (Name == "logf" && V > 0 && TLI->has(LibFunc_logf))) return ConstantFoldFP(log, V, Ty); - else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f))) + else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || + (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f))) return ConstantFoldFP(log10, V, Ty); - else if (IntrinsicID == Intrinsic::sqrt && - (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { - if (V >= -0.0) - return ConstantFoldFP(sqrt, V, Ty); - else { - // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which - // all guarantee or favor returning NaN - the square root of a - // negative number is not defined for the LLVM sqrt intrinsic. - // This is because the intrinsic should only be emitted in place of - // libm's sqrt function when using "no-nans-fp-math". - return UndefValue::get(Ty); - } - } break; case 'r': - if ((Name == "round" && TLI->has(LibFunc::round)) || - (Name == "roundf" && TLI->has(LibFunc::roundf))) + if ((Name == "round" && TLI->has(LibFunc_round)) || + (Name == "roundf" && TLI->has(LibFunc_roundf))) return ConstantFoldFP(round, V, Ty); case 's': - if ((Name == "sin" && TLI->has(LibFunc::sin)) || - (Name == "sinf" && TLI->has(LibFunc::sinf))) + if ((Name == "sin" && TLI->has(LibFunc_sin)) || + (Name == "sinf" && TLI->has(LibFunc_sinf))) return ConstantFoldFP(sin, V, Ty); - else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) || - (Name == "sinhf" && TLI->has(LibFunc::sinhf))) + else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || + (Name == "sinhf" && TLI->has(LibFunc_sinhf))) return ConstantFoldFP(sinh, V, Ty); - else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) || - (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))) + else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || + (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) return ConstantFoldFP(sqrt, V, Ty); break; case 't': - if ((Name == "tan" && TLI->has(LibFunc::tan)) || - (Name == "tanf" && TLI->has(LibFunc::tanf))) + if ((Name == "tan" && TLI->has(LibFunc_tan)) || + (Name == "tanf" && TLI->has(LibFunc_tanf))) return ConstantFoldFP(tan, V, Ty); - else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) || - (Name == "tanhf" && TLI->has(LibFunc::tanhf))) + else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) || + (Name == "tanhf" && TLI->has(LibFunc_tanhf))) return ConstantFoldFP(tanh, V, Ty); break; default: @@ -1779,7 +1768,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, } if (isa<UndefValue>(Operands[0])) { - if (IntrinsicID == Intrinsic::bswap) + if (IntrinsicID == Intrinsic::bswap || + IntrinsicID == Intrinsic::bitreverse) return Operands[0]; return nullptr; } @@ -1822,14 +1812,14 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, if (!TLI) return nullptr; - if ((Name == "pow" && TLI->has(LibFunc::pow)) || - (Name == "powf" && TLI->has(LibFunc::powf))) + if ((Name == "pow" && TLI->has(LibFunc_pow)) || + (Name == "powf" && TLI->has(LibFunc_powf))) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if ((Name == "fmod" && TLI->has(LibFunc::fmod)) || - (Name == "fmodf" && TLI->has(LibFunc::fmodf))) + if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || + (Name == "fmodf" && TLI->has(LibFunc_fmodf))) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if ((Name == "atan2" && TLI->has(LibFunc::atan2)) || - (Name == "atan2f" && TLI->has(LibFunc::atan2f))) + if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || + (Name == "atan2f" && TLI->has(LibFunc_atan2f))) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) @@ -2022,7 +2012,7 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { if (!F) return false; - LibFunc::Func Func; + LibFunc Func; if (!TLI || !TLI->getLibFunc(*F, Func)) return false; @@ -2030,20 +2020,20 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) { const APFloat &Op = OpC->getValueAPF(); switch (Func) { - case LibFunc::logl: - case LibFunc::log: - case LibFunc::logf: - case LibFunc::log2l: - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log10l: - case LibFunc::log10: - case LibFunc::log10f: + case LibFunc_logl: + case LibFunc_log: + case LibFunc_logf: + case LibFunc_log2l: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log10l: + case LibFunc_log10: + case LibFunc_log10f: return Op.isNaN() || (!Op.isZero() && !Op.isNegative()); - case LibFunc::expl: - case LibFunc::exp: - case LibFunc::expf: + case LibFunc_expl: + case LibFunc_exp: + case LibFunc_expf: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan && @@ -2053,9 +2043,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan; break; - case LibFunc::exp2l: - case LibFunc::exp2: - case LibFunc::exp2f: + case LibFunc_exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan && @@ -2065,17 +2055,17 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan; break; - case LibFunc::sinl: - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::cosl: - case LibFunc::cos: - case LibFunc::cosf: + case LibFunc_sinl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_cosl: + case LibFunc_cos: + case LibFunc_cosf: return !Op.isInfinity(); - case LibFunc::tanl: - case LibFunc::tan: - case LibFunc::tanf: { + case LibFunc_tanl: + case LibFunc_tan: + case LibFunc_tanf: { // FIXME: Stop using the host math library. // FIXME: The computation isn't done in the right precision. Type *Ty = OpC->getType(); @@ -2086,23 +2076,23 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { break; } - case LibFunc::asinl: - case LibFunc::asin: - case LibFunc::asinf: - case LibFunc::acosl: - case LibFunc::acos: - case LibFunc::acosf: + case LibFunc_asinl: + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_acosl: + case LibFunc_acos: + case LibFunc_acosf: return Op.compare(APFloat(Op.getSemantics(), "-1")) != APFloat::cmpLessThan && Op.compare(APFloat(Op.getSemantics(), "1")) != APFloat::cmpGreaterThan; - case LibFunc::sinh: - case LibFunc::cosh: - case LibFunc::sinhf: - case LibFunc::coshf: - case LibFunc::sinhl: - case LibFunc::coshl: + case LibFunc_sinh: + case LibFunc_cosh: + case LibFunc_sinhf: + case LibFunc_coshf: + case LibFunc_sinhl: + case LibFunc_coshl: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan && @@ -2112,9 +2102,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan; break; - case LibFunc::sqrtl: - case LibFunc::sqrt: - case LibFunc::sqrtf: + case LibFunc_sqrtl: + case LibFunc_sqrt: + case LibFunc_sqrtf: return Op.isNaN() || Op.isZero() || !Op.isNegative(); // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p, @@ -2133,9 +2123,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { const APFloat &Op1 = Op1C->getValueAPF(); switch (Func) { - case LibFunc::powl: - case LibFunc::pow: - case LibFunc::powf: { + case LibFunc_powl: + case LibFunc_pow: + case LibFunc_powf: { // FIXME: Stop using the host math library. // FIXME: The computation isn't done in the right precision. Type *Ty = Op0C->getType(); @@ -2149,9 +2139,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { break; } - case LibFunc::fmodl: - case LibFunc::fmod: - case LibFunc::fmodf: + case LibFunc_fmodl: + case LibFunc_fmod: + case LibFunc_fmodf: return Op0.isNaN() || Op1.isNaN() || (!Op0.isInfinity() && !Op1.isZero()); diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 6b77397956cd..32bfea58bf9d 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -447,25 +447,25 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { case Instruction::Select: { const SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); } case Instruction::Store: { const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return TTI->getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); + SI->getAlignment(), + SI->getPointerAddressSpace(), I); } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(I); return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); + LI->getAlignment(), + LI->getPointerAddressSpace(), I); } case Instruction::ZExt: case Instruction::SExt: @@ -481,7 +481,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); - return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); } case Instruction::ExtractElement: { const ExtractElementInst * EEI = cast<ExtractElementInst>(I); @@ -542,9 +542,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - SmallVector<Value *, 4> Args; - for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) - Args.push_back(II->getArgOperand(J)); + SmallVector<Value *, 4> Args(II->arg_operands()); FastMathFlags FMF; if (auto *FPMO = dyn_cast<FPMathOperator>(II)) diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 688c1db534c1..151c0b0e6c93 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -110,6 +110,9 @@ void DemandedBits::determineLiveOperandBits( // the output. AB = AOut.byteSwap(); break; + case Intrinsic::bitreverse: + AB = AOut.reverseBits(); + break; case Intrinsic::ctlz: if (OperandNo == 0) { // We need some output bits, so we need all bits of the diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index a332a07ce864..a4672efeedd6 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -385,9 +385,9 @@ void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { Kind = Any; } - +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // For debugging purposes. Dumps the constraint out to OS. -void DependenceInfo::Constraint::dump(raw_ostream &OS) const { +LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { if (isEmpty()) OS << " Empty\n"; else if (isAny()) @@ -403,6 +403,7 @@ void DependenceInfo::Constraint::dump(raw_ostream &OS) const { else llvm_unreachable("unknown constraint type in Constraint::dump"); } +#endif // Updates X with the intersection diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 15856c3f8b7a..5b6e2d0476e4 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -56,6 +56,16 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const { } #endif +/// Handle invalidation explicitly. +bool DominanceFrontier::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<DominanceFrontierAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || + PAC.preservedSet<CFGAnalyses>()); +} + AnalysisKey DominanceFrontierAnalysis::Key; DominanceFrontier DominanceFrontierAnalysis::run(Function &F, diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index a661b0101e6a..fde805a5fde5 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -76,9 +76,8 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, // An add is interesting if exactly one of its operands is interesting. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { bool AnyInterestingYet = false; - for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); - OI != OE; ++OI) - if (isInteresting(*OI, I, L, SE, LI)) { + for (const auto *Op : Add->operands()) + if (isInteresting(Op, I, L, SE, LI)) { if (AnyInterestingYet) return false; AnyInterestingYet = true; @@ -118,6 +117,50 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, return true; } +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) + return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN || !Operand) + return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of Operand by the PHI node. If any use corresponds + // to a block that is not dominated by the latch block, give up and use the + // preincremented value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Operand && + !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + + // Okay, all uses of Operand by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + /// AddUsersImpl - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. @@ -208,10 +251,26 @@ bool IVUsers::AddUsersImpl(Instruction *I, // The regular return value here is discarded; instead of recording // it, we just recompute it when we need it. const SCEV *OriginalISE = ISE; - ISE = TransformForPostIncUse(NormalizeAutodetect, - ISE, User, I, - NewUse.PostIncLoops, - *SE, *DT); + + auto NormalizePred = [&](const SCEVAddRecExpr *AR) { + // We only allow affine AddRecs to be normalized, otherwise we would not + // be able to correctly denormalize. + // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} + // Normalized form: {-2,+,1,+,2} + // Denormalized form: {1,+,3,+,2} + // + // However, denormalization would use a different step expression than + // normalization (see getPostIncExpr), generating the wrong final + // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} + auto *L = AR->getLoop(); + bool Result = + AR->isAffine() && IVUseShouldUsePostIncValue(User, I, L, DT); + if (Result) + NewUse.PostIncLoops.insert(L); + return Result; + }; + + ISE = normalizeForPostIncUseIf(ISE, NormalizePred, *SE); // PostIncNormalization effectively simplifies the expression under // pre-increment assumptions. Those assumptions (no wrapping) might not @@ -219,8 +278,7 @@ bool IVUsers::AddUsersImpl(Instruction *I, // transformation is invertible. if (OriginalISE != ISE) { const SCEV *DenormalizedISE = - TransformForPostIncUse(Denormalize, ISE, User, I, - NewUse.PostIncLoops, *SE, *DT); + denormalizeForPostIncUse(ISE, NewUse.PostIncLoops, *SE); // If we normalized the expression, but denormalization doesn't give the // original one, discard this user. @@ -338,11 +396,8 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const { /// getExpr - Return the expression for the use. const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const { - return - TransformForPostIncUse(Normalize, getReplacementExpr(IU), - IU.getUser(), IU.getOperandValToReplace(), - const_cast<PostIncLoopSet &>(IU.getPostIncLoops()), - *SE, *DT); + return normalizeForPostIncUse(getReplacementExpr(IU), IU.getPostIncLoops(), + *SE); } static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { @@ -353,9 +408,8 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { } if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + for (const auto *Op : Add->operands()) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(Op, L)) return AR; return nullptr; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 4109049ecabc..1f8dec2aed80 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -48,6 +49,11 @@ static cl::opt<int> HintThreshold( "inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint")); +static cl::opt<int> + ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, + cl::init(45), + cl::desc("Threshold for inlining cold callsites")); + // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. @@ -72,12 +78,18 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// Getter for the cache of @llvm.assume intrinsics. std::function<AssumptionCache &(Function &)> &GetAssumptionCache; + /// Getter for BlockFrequencyInfo + Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI; + /// Profile summary information. ProfileSummaryInfo *PSI; /// The called function. Function &F; + // Cache the DataLayout since we use it a lot. + const DataLayout &DL; + /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. @@ -133,9 +145,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { void disableSROA(Value *V); void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, int InstructionCost); - bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool isGEPFree(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); + template <typename Callable> + bool simplifyInstruction(Instruction &I, Callable Evaluate); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); /// Return true if the given argument to the function being considered for @@ -202,9 +216,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { public: CallAnalyzer(const TargetTransformInfo &TTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, const InlineParams &Params) - : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee), + : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), + PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), @@ -286,23 +302,11 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, SROACostSavings += InstructionCost; } -/// \brief Check whether a GEP's indices are all constant. -/// -/// Respects any simplified values known during the analysis of this callsite. -bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { - for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) - if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) - return false; - - return true; -} - /// \brief Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - const DataLayout &DL = F.getParent()->getDataLayout(); unsigned IntPtrWidth = DL.getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); @@ -331,13 +335,27 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { return true; } +/// \brief Use TTI to check whether a GEP is free. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { + SmallVector<Value *, 4> Indices; + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) + Indices.push_back(SimpleOp); + else + Indices.push_back(*I); + return TargetTransformInfo::TCC_Free == + TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(), + Indices); +} + bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Check whether inlining will turn a dynamic alloca into a static // alloca and handle that case. if (I.isArrayAllocation()) { Constant *Size = SimplifiedValues.lookup(I.getArraySize()); if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { - const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); @@ -347,7 +365,6 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { - const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); } @@ -396,7 +413,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Non-constant GEPs aren't folded, and disable SROA. if (SROACandidate) disableSROA(CostIt); - return false; + return isGEPFree(I); } // Add the result as a new mapping to Base + Offset. @@ -411,7 +428,15 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { } } - if (isGEPOffsetConstant(I)) { + // Lambda to check whether a GEP's indices are all constant. + auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) + return false; + return true; + }; + + if (IsGEPOffsetConstant(I)) { if (SROACandidate) SROAArgValues[&I] = SROAArg; @@ -422,19 +447,36 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { // Variable GEPs will require math and will disable SROA. if (SROACandidate) disableSROA(CostIt); - return false; + return isGEPFree(I); +} + +/// Simplify \p I if its operands are constants and update SimplifiedValues. +/// \p Evaluate is a callable specific to instruction type that evaluates the +/// instruction when all the operands are constants. +template <typename Callable> +bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { + SmallVector<Constant *, 2> COps; + for (Value *Op : I.operands()) { + Constant *COp = dyn_cast<Constant>(Op); + if (!COp) + COp = SimplifiedValues.lookup(Op); + if (!COp) + return false; + COps.push_back(COp); + } + auto *C = Evaluate(COps); + if (!C) + return false; + SimplifiedValues[&I] = C; + return true; } bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. - Constant *COp = dyn_cast<Constant>(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getBitCast(COps[0], I.getType()); + })) + return true; // Track base/offsets through casts std::pair<Value *, APInt> BaseAndOffset = @@ -455,19 +497,14 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. - Constant *COp = dyn_cast<Constant>(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getPtrToInt(COps[0], I.getType()); + })) + return true; // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - const DataLayout &DL = F.getParent()->getDataLayout(); if (IntegerSize >= DL.getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); @@ -492,20 +529,15 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. - Constant *COp = dyn_cast<Constant>(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getIntToPtr(COps[0], I.getType()); + })) + return true; // Track base/offset pairs when round-tripped through a pointer without // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - const DataLayout &DL = F.getParent()->getDataLayout(); if (IntegerSize <= DL.getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) @@ -523,14 +555,10 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through ptrtoint. - Constant *COp = dyn_cast<Constant>(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); - if (COp) - if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); + })) + return true; // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); @@ -540,16 +568,10 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Value *Operand = I.getOperand(0); - Constant *COp = dyn_cast<Constant>(Operand); - if (!COp) - COp = SimplifiedValues.lookup(Operand); - if (COp) { - const DataLayout &DL = F.getParent()->getDataLayout(); - if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) { - SimplifiedValues[&I] = C; - return true; - } - } + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantFoldInstOperands(&I, COps[0], DL); + })) + return true; // Disable any SROA on the argument to arbitrary unary operators. disableSROA(Operand); @@ -558,8 +580,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { } bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { - unsigned ArgNo = A->getArgNo(); - return CandidateCS.paramHasAttr(ArgNo + 1, Attr); + return CandidateCS.paramHasAttr(A->getArgNo(), Attr); } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { @@ -642,16 +663,21 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { if (Callee.hasFnAttribute(Attribute::InlineHint)) Threshold = MaxIfValid(Threshold, Params.HintThreshold); if (PSI) { - uint64_t TotalWeight; - if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) && - PSI->isHotCount(TotalWeight)) { - Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); + BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; + if (PSI->isHotCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Hot callsite.\n"); + Threshold = Params.HotCallSiteThreshold.getValue(); } else if (PSI->isFunctionEntryHot(&Callee)) { + DEBUG(dbgs() << "Hot callee.\n"); // If callsite hotness can not be determined, we may still know // that the callee is hot and treat it as a weaker hint for threshold // increase. Threshold = MaxIfValid(Threshold, Params.HintThreshold); + } else if (PSI->isColdCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Cold callsite.\n"); + Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } else if (PSI->isFunctionEntryCold(&Callee)) { + DEBUG(dbgs() << "Cold callee.\n"); Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } @@ -665,20 +691,10 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. - if (!isa<Constant>(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa<Constant>(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast<Constant>(LHS)) { - if (Constant *CRHS = dyn_cast<Constant>(RHS)) - if (Constant *C = - ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - return true; - } - } + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); + })) + return true; if (I.getOpcode() == Instruction::FCmp) return false; @@ -756,24 +772,18 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) { bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - const DataLayout &DL = F.getParent()->getDataLayout(); - if (!isa<Constant>(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa<Constant>(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - Value *SimpleV = nullptr; - if (auto FI = dyn_cast<FPMathOperator>(&I)) - SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); - else - SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); + auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) { + Value *SimpleV = nullptr; + if (auto FI = dyn_cast<FPMathOperator>(&I)) + SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1], + FI->getFastMathFlags(), DL); + else + SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL); + return dyn_cast_or_null<Constant>(SimpleV); + }; - if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { - SimplifiedValues[&I] = C; + if (simplifyInstruction(I, Evaluate)) return true; - } // Disable any SROA on arguments to arbitrary, unsimplified binary operators. disableSROA(LHS); @@ -814,13 +824,10 @@ bool CallAnalyzer::visitStore(StoreInst &I) { bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. - Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); - if (!C) - C = SimplifiedValues.lookup(I.getAggregateOperand()); - if (C) { - SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getExtractValue(COps[0], I.getIndices()); + })) return true; - } // SROA can look through these but give them a cost. return false; @@ -828,17 +835,12 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { // Constant folding for insert value is trivial. - Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); - if (!AggC) - AggC = SimplifiedValues.lookup(I.getAggregateOperand()); - Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); - if (!InsertedC) - InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); - if (AggC && InsertedC) { - SimplifiedValues[&I] = - ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices()); + if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { + return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], + /*InsertedValueOperand*/ COps[1], + I.getIndices()); + })) return true; - } // SROA can look through these but give them a cost. return false; @@ -959,7 +961,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // out. Pretend to inline the function, with a custom threshold. auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams); + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS, + IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1006,8 +1009,8 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // does not (yet) fire. SmallPtrSet<BasicBlock *, 8> SuccessorBlocks; SuccessorBlocks.insert(SI.getDefaultDest()); - for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) - SuccessorBlocks.insert(I.getCaseSuccessor()); + for (auto Case : SI.cases()) + SuccessorBlocks.insert(Case.getCaseSuccessor()); // Add cost corresponding to the number of distinct destinations. The first // we model as free because of fallthrough. Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; @@ -1098,19 +1101,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // is expensive or the function has the "use-soft-float" attribute, this may // eventually become a library call. Treat the cost as such. if (I->getType()->isFloatingPointTy()) { - bool hasSoftFloatAttr = false; - // If the function has the "use-soft-float" attribute, mark it as // expensive. - if (F.hasFnAttribute("use-soft-float")) { - Attribute Attr = F.getFnAttribute("use-soft-float"); - StringRef Val = Attr.getValueAsString(); - if (Val == "true") - hasSoftFloatAttr = true; - } - if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || - hasSoftFloatAttr) + (F.getFnAttribute("use-soft-float").getValueAsString() == "true")) Cost += InlineConstants::CallPenalty; } @@ -1155,7 +1149,6 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!V->getType()->isPointerTy()) return nullptr; - const DataLayout &DL = F.getParent()->getDataLayout(); unsigned IntPtrWidth = DL.getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); @@ -1212,7 +1205,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { FiftyPercentVectorBonus = 3 * Threshold / 2; TenPercentVectorBonus = 3 * Threshold / 4; - const DataLayout &DL = F.getParent()->getDataLayout(); // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the @@ -1371,7 +1363,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor()); continue; } } @@ -1430,13 +1422,6 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() { } #endif -/// \brief Test that two functions either have or have not the given attribute -/// at the same time. -template <typename AttrKind> -static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { - return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr); -} - /// \brief Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes(Function *Caller, @@ -1449,15 +1434,17 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, InlineCost llvm::getInlineCost( CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI) { return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, PSI); + GetAssumptionCache, GetBFI, PSI); } InlineCost llvm::getInlineCost( CallSite CS, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI) { // Cannot inline indirect calls. @@ -1492,7 +1479,8 @@ InlineCost llvm::getInlineCost( DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params); + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS, + Params); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1565,6 +1553,9 @@ InlineParams llvm::getInlineParams(int Threshold) { // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. Params.HotCallSiteThreshold = HotCallSiteThreshold; + // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. + Params.ColdCallSiteThreshold = ColdCallSiteThreshold; + // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // -inlinehint-threshold commandline option is not explicitly given. If that diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 796e6e444980..e12f640394e6 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" @@ -140,10 +141,9 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. -static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExpand, const Query &Q, +static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, + Instruction::BinaryOps OpcodeToExpand, const Query &Q, unsigned MaxRecurse) { - Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -199,9 +199,9 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. -static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { - Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; +static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, + Value *LHS, Value *RHS, const Query &Q, + unsigned MaxRecurse) { assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); // Recursion is always used, so bail out at once if we already hit the limit. @@ -298,8 +298,9 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, /// try to simplify the binop by seeing whether evaluating it on both branches /// of the select results in the same value. Returns the common value if so, /// otherwise returns null. -static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { +static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, + Value *RHS, const Query &Q, + unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -451,8 +452,9 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// try to simplify the binop by seeing whether evaluating it on the incoming /// phi values yields the same result for every value. If so returns the common /// value, otherwise returns null. -static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { +static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, + Value *RHS, const Query &Q, + unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -527,17 +529,26 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, return CommonValue; } +static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, + Value *&Op0, Value *&Op1, + const Query &Q) { + if (auto *CLHS = dyn_cast<Constant>(Op0)) { + if (auto *CRHS = dyn_cast<Constant>(Op1)) + return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); + + // Canonicalize the constant to the RHS if this is a commutative operation. + if (Instruction::isCommutative(Opcode)) + std::swap(Op0, Op1); + } + return nullptr; +} + /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::Add, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } + if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) + return C; // X + undef -> undef if (match(Op1, m_Undef())) @@ -556,12 +567,20 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return Y; // X + ~X -> -1 since ~X = -X-1 + Type *Ty = Op0->getType(); if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0)))) - return Constant::getAllOnesValue(Op0->getType()); + return Constant::getAllOnesValue(Ty); + + // add nsw/nuw (xor Y, signbit), signbit --> Y + // The no-wrapping add guarantees that the top bit will be set by the add. + // Therefore, the xor must be clearing the already set sign bit of Y. + if ((isNSW || isNUW) && match(Op1, m_SignBit()) && + match(Op0, m_Xor(m_Value(Y), m_SignBit()))) + return Y; /// i1 add -> xor. - if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -665,9 +684,8 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, /// If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::Sub, CLHS, CRHS, Q.DL); + if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) + return C; // X - undef -> undef // undef - X -> undef @@ -692,7 +710,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownZero == ~APInt::getSignBit(BitWidth)) { + if (KnownZero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. if (isNSW) @@ -779,7 +797,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // i1 sub -> xor. - if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -807,13 +825,8 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::FAdd, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } + if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) + return C; // fadd X, -0 ==> X if (match(Op1, m_NegZero())) @@ -846,10 +859,8 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// returns null. static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::FSub, CLHS, CRHS, Q.DL); - } + if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) + return C; // fsub X, 0 ==> X if (match(Op1, m_Zero())) @@ -878,40 +889,28 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, } /// Given the operands for an FMul, see if we can fold the result -static Value *SimplifyFMulInst(Value *Op0, Value *Op1, - FastMathFlags FMF, - const Query &Q, - unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::FMul, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) + return C; - // fmul X, 1.0 ==> X - if (match(Op1, m_FPOne())) - return Op0; + // fmul X, 1.0 ==> X + if (match(Op1, m_FPOne())) + return Op0; - // fmul nnan nsz X, 0 ==> 0 - if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) - return Op1; + // fmul nnan nsz X, 0 ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) + return Op1; - return nullptr; + return nullptr; } /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::Mul, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } + if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) + return C; // X * undef -> 0 if (match(Op1, m_Undef())) @@ -932,7 +931,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return X; // i1 mul -> and. - if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -998,43 +997,68 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, RecursionLimit); } -/// Given operands for an SDiv or UDiv, see if we can fold the result. -/// If not, this returns null. -static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { - if (Constant *C0 = dyn_cast<Constant>(Op0)) - if (Constant *C1 = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL); - - bool isSigned = Opcode == Instruction::SDiv; +/// Check for common or similar folds of integer division or integer remainder. +static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { + Type *Ty = Op0->getType(); // X / undef -> undef + // X % undef -> undef if (match(Op1, m_Undef())) return Op1; - // X / 0 -> undef, we don't need to preserve faults! + // X / 0 -> undef + // X % 0 -> undef + // We don't need to preserve faults! if (match(Op1, m_Zero())) - return UndefValue::get(Op1->getType()); + return UndefValue::get(Ty); + + // If any element of a constant divisor vector is zero, the whole op is undef. + auto *Op1C = dyn_cast<Constant>(Op1); + if (Op1C && Ty->isVectorTy()) { + unsigned NumElts = Ty->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = Op1C->getAggregateElement(i); + if (Elt && Elt->isNullValue()) + return UndefValue::get(Ty); + } + } // undef / X -> 0 + // undef % X -> 0 if (match(Op0, m_Undef())) - return Constant::getNullValue(Op0->getType()); + return Constant::getNullValue(Ty); - // 0 / X -> 0, we don't need to preserve faults! + // 0 / X -> 0 + // 0 % X -> 0 if (match(Op0, m_Zero())) return Op0; + // X / X -> 1 + // X % X -> 0 + if (Op0 == Op1) + return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty); + // X / 1 -> X - if (match(Op1, m_One())) - return Op0; + // X % 1 -> 0 + // If this is a boolean op (single-bit element type), we can't have + // division-by-zero or remainder-by-zero, so assume the divisor is 1. + if (match(Op1, m_One()) || Ty->getScalarType()->isIntegerTy(1)) + return IsDiv ? Op0 : Constant::getNullValue(Ty); - if (Op0->getType()->isIntegerTy(1)) - // It can't be division by zero, hence it must be division by one. - return Op0; + return nullptr; +} - // X / X -> 1 - if (Op0 == Op1) - return ConstantInt::get(Op0->getType(), 1); +/// Given operands for an SDiv or UDiv, see if we can fold the result. +/// If not, this returns null. +static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const Query &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) + return C; + + if (Value *V = simplifyDivRem(Op0, Op1, true)) + return V; + + bool isSigned = Opcode == Instruction::SDiv; // (X * Y) / Y -> X if the multiplication does not overflow. Value *X = nullptr, *Y = nullptr; @@ -1129,6 +1153,9 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const Query &Q, unsigned) { + if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) + return C; + // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1178,37 +1205,11 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { - if (Constant *C0 = dyn_cast<Constant>(Op0)) - if (Constant *C1 = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL); - - // X % undef -> undef - if (match(Op1, m_Undef())) - return Op1; - - // undef % X -> 0 - if (match(Op0, m_Undef())) - return Constant::getNullValue(Op0->getType()); - - // 0 % X -> 0, we don't need to preserve faults! - if (match(Op0, m_Zero())) - return Op0; + if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) + return C; - // X % 0 -> undef, we don't need to preserve faults! - if (match(Op1, m_Zero())) - return UndefValue::get(Op0->getType()); - - // X % 1 -> 0 - if (match(Op1, m_One())) - return Constant::getNullValue(Op0->getType()); - - if (Op0->getType()->isIntegerTy(1)) - // It can't be remainder by zero, hence it must be remainder by one. - return Constant::getNullValue(Op0->getType()); - - // X % X -> 0 - if (Op0 == Op1) - return Constant::getNullValue(Op0->getType()); + if (Value *V = simplifyDivRem(Op0, Op1, false)) + return V; // (X % Y) % Y -> X % Y if ((Opcode == Instruction::SRem && @@ -1279,7 +1280,10 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &, unsigned) { + const Query &Q, unsigned) { + if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) + return C; + // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1335,11 +1339,10 @@ static bool isUndefShift(Value *Amount) { /// Given operands for an Shl, LShr or AShr, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { - if (Constant *C0 = dyn_cast<Constant>(Op0)) - if (Constant *C1 = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL); +static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, + Value *Op1, const Query &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) + return C; // 0 shift by X -> 0 if (match(Op0, m_Zero())) @@ -1386,8 +1389,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, /// \brief Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1, - bool isExact, const Query &Q, +static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, + Value *Op1, bool isExact, const Query &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) return V; @@ -1636,13 +1639,8 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::And, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } + if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) + return C; // X & undef -> 0 if (match(Op1, m_Undef())) @@ -1838,13 +1836,8 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { /// If not, this returns null. static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::Or, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } + if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) + return C; // X | undef -> -1 if (match(Op1, m_Undef())) @@ -1971,13 +1964,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, /// If not, this returns null. static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { - if (Constant *CLHS = dyn_cast<Constant>(Op0)) { - if (Constant *CRHS = dyn_cast<Constant>(Op1)) - return ConstantFoldBinaryOpOperands(Instruction::Xor, CLHS, CRHS, Q.DL); - - // Canonicalize the constant to the RHS. - std::swap(Op0, Op1); - } + if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) + return C; // A ^ undef -> undef if (match(Op1, m_Undef())) @@ -2377,6 +2365,163 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, return nullptr; } +/// Many binary operators with a constant operand have an easy-to-compute +/// range of outputs. This can be used to fold a comparison to always true or +/// always false. +static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { + unsigned Width = Lower.getBitWidth(); + const APInt *C; + switch (BO.getOpcode()) { + case Instruction::Add: + if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + // FIXME: If we have both nuw and nsw, we should reduce the range further. + if (BO.hasNoUnsignedWrap()) { + // 'add nuw x, C' produces [C, UINT_MAX]. + Lower = *C; + } else if (BO.hasNoSignedWrap()) { + if (C->isNegative()) { + // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. + Lower = APInt::getSignedMinValue(Width); + Upper = APInt::getSignedMaxValue(Width) + *C + 1; + } else { + // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. + Lower = APInt::getSignedMinValue(Width) + *C; + Upper = APInt::getSignedMaxValue(Width) + 1; + } + } + } + break; + + case Instruction::And: + if (match(BO.getOperand(1), m_APInt(C))) + // 'and x, C' produces [0, C]. + Upper = *C + 1; + break; + + case Instruction::Or: + if (match(BO.getOperand(1), m_APInt(C))) + // 'or x, C' produces [C, UINT_MAX]. + Lower = *C; + break; + + case Instruction::AShr: + if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { + // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. + Lower = APInt::getSignedMinValue(Width).ashr(*C); + Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; + } else if (match(BO.getOperand(0), m_APInt(C))) { + unsigned ShiftAmount = Width - 1; + if (*C != 0 && BO.isExact()) + ShiftAmount = C->countTrailingZeros(); + if (C->isNegative()) { + // 'ashr C, x' produces [C, C >> (Width-1)] + Lower = *C; + Upper = C->ashr(ShiftAmount) + 1; + } else { + // 'ashr C, x' produces [C >> (Width-1), C] + Lower = C->ashr(ShiftAmount); + Upper = *C + 1; + } + } + break; + + case Instruction::LShr: + if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { + // 'lshr x, C' produces [0, UINT_MAX >> C]. + Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1; + } else if (match(BO.getOperand(0), m_APInt(C))) { + // 'lshr C, x' produces [C >> (Width-1), C]. + unsigned ShiftAmount = Width - 1; + if (*C != 0 && BO.isExact()) + ShiftAmount = C->countTrailingZeros(); + Lower = C->lshr(ShiftAmount); + Upper = *C + 1; + } + break; + + case Instruction::Shl: + if (match(BO.getOperand(0), m_APInt(C))) { + if (BO.hasNoUnsignedWrap()) { + // 'shl nuw C, x' produces [C, C << CLZ(C)] + Lower = *C; + Upper = Lower.shl(Lower.countLeadingZeros()) + 1; + } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? + if (C->isNegative()) { + // 'shl nsw C, x' produces [C << CLO(C)-1, C] + unsigned ShiftAmount = C->countLeadingOnes() - 1; + Lower = C->shl(ShiftAmount); + Upper = *C + 1; + } else { + // 'shl nsw C, x' produces [C, C << CLZ(C)-1] + unsigned ShiftAmount = C->countLeadingZeros() - 1; + Lower = *C; + Upper = C->shl(ShiftAmount) + 1; + } + } + } + break; + + case Instruction::SDiv: + if (match(BO.getOperand(1), m_APInt(C))) { + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (C->isAllOnesValue()) { + // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] + // where C != -1 and C != 0 and C != 1 + Lower = IntMin + 1; + Upper = IntMax + 1; + } else if (C->countLeadingZeros() < Width - 1) { + // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] + // where C != -1 and C != 0 and C != 1 + Lower = IntMin.sdiv(*C); + Upper = IntMax.sdiv(*C); + if (Lower.sgt(Upper)) + std::swap(Lower, Upper); + Upper = Upper + 1; + assert(Upper != Lower && "Upper part of range has wrapped!"); + } + } else if (match(BO.getOperand(0), m_APInt(C))) { + if (C->isMinSignedValue()) { + // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. + Lower = *C; + Upper = Lower.lshr(1) + 1; + } else { + // 'sdiv C, x' produces [-|C|, |C|]. + Upper = C->abs() + 1; + Lower = (-Upper) + 1; + } + } + break; + + case Instruction::UDiv: + if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + // 'udiv x, C' produces [0, UINT_MAX / C]. + Upper = APInt::getMaxValue(Width).udiv(*C) + 1; + } else if (match(BO.getOperand(0), m_APInt(C))) { + // 'udiv C, x' produces [0, C]. + Upper = *C + 1; + } + break; + + case Instruction::SRem: + if (match(BO.getOperand(1), m_APInt(C))) { + // 'srem x, C' produces (-|C|, |C|). + Upper = C->abs(); + Lower = (-Upper) + 1; + } + break; + + case Instruction::URem: + if (match(BO.getOperand(1), m_APInt(C))) + // 'urem x, C' produces [0, C). + Upper = *C; + break; + + default: + break; + } +} + static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, Value *RHS) { const APInt *C; @@ -2390,114 +2535,12 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, if (RHS_CR.isFullSet()) return ConstantInt::getTrue(GetCompareTy(RHS)); - // Many binary operators with constant RHS have easy to compute constant - // range. Use them to check whether the comparison is a tautology. + // Find the range of possible values for binary operators. unsigned Width = C->getBitWidth(); APInt Lower = APInt(Width, 0); APInt Upper = APInt(Width, 0); - const APInt *C2; - if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) { - // 'urem x, C2' produces [0, C2). - Upper = *C2; - } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) { - // 'srem x, C2' produces (-|C2|, |C2|). - Upper = C2->abs(); - Lower = (-Upper) + 1; - } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) { - // 'udiv C2, x' produces [0, C2]. - Upper = *C2 + 1; - } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) { - // 'udiv x, C2' produces [0, UINT_MAX / C2]. - APInt NegOne = APInt::getAllOnesValue(Width); - if (*C2 != 0) - Upper = NegOne.udiv(*C2) + 1; - } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) { - if (C2->isMinSignedValue()) { - // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. - Lower = *C2; - Upper = Lower.lshr(1) + 1; - } else { - // 'sdiv C2, x' produces [-|C2|, |C2|]. - Upper = C2->abs() + 1; - Lower = (-Upper) + 1; - } - } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) { - APInt IntMin = APInt::getSignedMinValue(Width); - APInt IntMax = APInt::getSignedMaxValue(Width); - if (C2->isAllOnesValue()) { - // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] - // where C2 != -1 and C2 != 0 and C2 != 1 - Lower = IntMin + 1; - Upper = IntMax + 1; - } else if (C2->countLeadingZeros() < Width - 1) { - // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2] - // where C2 != -1 and C2 != 0 and C2 != 1 - Lower = IntMin.sdiv(*C2); - Upper = IntMax.sdiv(*C2); - if (Lower.sgt(Upper)) - std::swap(Lower, Upper); - Upper = Upper + 1; - assert(Upper != Lower && "Upper part of range has wrapped!"); - } - } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) { - // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)] - Lower = *C2; - Upper = Lower.shl(Lower.countLeadingZeros()) + 1; - } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) { - if (C2->isNegative()) { - // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2] - unsigned ShiftAmount = C2->countLeadingOnes() - 1; - Lower = C2->shl(ShiftAmount); - Upper = *C2 + 1; - } else { - // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1] - unsigned ShiftAmount = C2->countLeadingZeros() - 1; - Lower = *C2; - Upper = C2->shl(ShiftAmount) + 1; - } - } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) { - // 'lshr x, C2' produces [0, UINT_MAX >> C2]. - APInt NegOne = APInt::getAllOnesValue(Width); - if (C2->ult(Width)) - Upper = NegOne.lshr(*C2) + 1; - } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) { - // 'lshr C2, x' produces [C2 >> (Width-1), C2]. - unsigned ShiftAmount = Width - 1; - if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact()) - ShiftAmount = C2->countTrailingZeros(); - Lower = C2->lshr(ShiftAmount); - Upper = *C2 + 1; - } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) { - // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2]. - APInt IntMin = APInt::getSignedMinValue(Width); - APInt IntMax = APInt::getSignedMaxValue(Width); - if (C2->ult(Width)) { - Lower = IntMin.ashr(*C2); - Upper = IntMax.ashr(*C2) + 1; - } - } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) { - unsigned ShiftAmount = Width - 1; - if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact()) - ShiftAmount = C2->countTrailingZeros(); - if (C2->isNegative()) { - // 'ashr C2, x' produces [C2, C2 >> (Width-1)] - Lower = *C2; - Upper = C2->ashr(ShiftAmount) + 1; - } else { - // 'ashr C2, x' produces [C2 >> (Width-1), C2] - Lower = C2->ashr(ShiftAmount); - Upper = *C2 + 1; - } - } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) { - // 'or x, C2' produces [C2, UINT_MAX]. - Lower = *C2; - } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) { - // 'and x, C2' produces [0, C2]. - Upper = *C2 + 1; - } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) { - // 'add nuw x, C2' produces [C2, UINT_MAX]. - Lower = *C2; - } + if (auto *BO = dyn_cast<BinaryOperator>(LHS)) + setLimitsForBinOp(*BO, Lower, Upper); ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true); @@ -3064,8 +3107,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If both operands have range metadata, use the metadata // to simplify the comparison. if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) { - auto RHS_Instr = dyn_cast<Instruction>(RHS); - auto LHS_Instr = dyn_cast<Instruction>(LHS); + auto RHS_Instr = cast<Instruction>(RHS); + auto LHS_Instr = cast<Instruction>(LHS); if (RHS_Instr->getMetadata(LLVMContext::MD_range) && LHS_Instr->getMetadata(LLVMContext::MD_range)) { @@ -4039,6 +4082,62 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, RecursionLimit); } +static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const Query &Q, + unsigned MaxRecurse) { + Type *InVecTy = Op0->getType(); + unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); + unsigned InVecNumElts = InVecTy->getVectorNumElements(); + + auto *Op0Const = dyn_cast<Constant>(Op0); + auto *Op1Const = dyn_cast<Constant>(Op1); + + // If all operands are constant, constant fold the shuffle. + if (Op0Const && Op1Const) + return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); + + // If only one of the operands is constant, constant fold the shuffle if the + // mask does not select elements from the variable operand. + bool MaskSelects0 = false, MaskSelects1 = false; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = ShuffleVectorInst::getMaskValue(Mask, i); + if (Idx == -1) + continue; + if ((unsigned)Idx < InVecNumElts) + MaskSelects0 = true; + else + MaskSelects1 = true; + } + if (!MaskSelects0 && Op1Const) + return ConstantFoldShuffleVectorInstruction(UndefValue::get(InVecTy), + Op1Const, Mask); + if (!MaskSelects1 && Op0Const) + return ConstantFoldShuffleVectorInstruction(Op0Const, + UndefValue::get(InVecTy), Mask); + + // A shuffle of a splat is always the splat itself. Legal if the shuffle's + // value type is same as the input vectors' type. + if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) + if (!MaskSelects1 && RetTy == InVecTy && + OpShuf->getMask()->getSplatValue()) + return Op0; + if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op1)) + if (!MaskSelects0 && RetTy == InVecTy && + OpShuf->getMask()->getSplatValue()) + return Op1; + + return nullptr; +} + +/// Given operands for a ShuffleVectorInst, fold the result or return null. +Value *llvm::SimplifyShuffleVectorInst( + Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, + const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, + AssumptionCache *AC, const Instruction *CxtI) { + return ::SimplifyShuffleVectorInst( + Op0, Op1, Mask, RetTy, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); +} + //=== Helper functions for higher up the class hierarchy. /// Given operands for a BinaryOperator, see if we can fold the result. @@ -4047,61 +4146,43 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: - return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - Q, MaxRecurse); + return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); - case Instruction::Sub: - return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - Q, MaxRecurse); + return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::FSub: return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); - - case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::Mul: + return SimplifyMulInst(LHS, RHS, Q, MaxRecurse); case Instruction::FMul: - return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); - case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); - case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); + return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::SDiv: + return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::UDiv: + return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::FDiv: - return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); - case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); - case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); + return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::SRem: + return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::URem: + return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); case Instruction::FRem: - return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::Shl: - return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - Q, MaxRecurse); + return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse); case Instruction::LShr: - return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); + return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse); case Instruction::AShr: - return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); - case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); - case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse); - case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); + return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse); + case Instruction::And: + return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Or: + return SimplifyOrInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Xor: + return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); default: - if (Constant *CLHS = dyn_cast<Constant>(LHS)) - if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); - - // If the operation is associative, try some generic simplifications. - if (Instruction::isAssociative(Opcode)) - if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse)) - return V; - - // If the operation is with the result of a select instruction check whether - // operating on either branch of the select always yields the same value. - if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse)) - return V; - - // If the operation is with the result of a phi instruction, check whether - // operating on all incoming values of the phi always yields the same value. - if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) - return V; - - return nullptr; + llvm_unreachable("Unexpected opcode"); } } @@ -4267,6 +4348,7 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, case Intrinsic::fabs: { if (SignBitMustBeZero(*ArgBegin, Q.TLI)) return *ArgBegin; + return nullptr; } default: return nullptr; @@ -4396,7 +4478,8 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, /// If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC) { + const DominatorTree *DT, AssumptionCache *AC, + OptimizationRemarkEmitter *ORE) { Value *Result; switch (I->getOpcode()) { @@ -4522,6 +4605,13 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); break; } + case Instruction::ShuffleVector: { + auto *SVI = cast<ShuffleVectorInst>(I); + Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), + SVI->getMask(), SVI->getType(), DL, TLI, + DT, AC, I); + break; + } case Instruction::PHI: Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I)); break; @@ -4537,6 +4627,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), DL, TLI, DT, AC, I); break; + case Instruction::Alloca: + // No simplifications for Alloca and it can't be constant folded. + Result = nullptr; + break; } // In general, it is possible for computeKnownBits to determine all bits in a @@ -4545,7 +4639,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, unsigned BitWidth = I->getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); + computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT, ORE); if ((KnownZero | KnownOne).isAllOnesValue()) Result = ConstantInt::get(I->getType(), KnownOne); } diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp index d1374acd963e..2a736ec0379c 100644 --- a/lib/Analysis/IteratedDominanceFrontier.cpp +++ b/lib/Analysis/IteratedDominanceFrontier.cpp @@ -64,10 +64,7 @@ void IDFCalculator<NodeTy>::calculate( BasicBlock *BB = Node->getBlock(); // Succ is the successor in the direction we are calculating IDF, so it is // successor for IDF, and predecessor for Reverse IDF. - for (auto SuccIter = GraphTraits<NodeTy>::child_begin(BB), - End = GraphTraits<NodeTy>::child_end(BB); - SuccIter != End; ++SuccIter) { - BasicBlock *Succ = *SuccIter; + for (auto *Succ : children<NodeTy>(BB)) { DomTreeNode *SuccNode = DT.getNode(Succ); // Quickly skip all CFG edges that are also dominator tree edges instead diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp index 596b6fc1afb5..a8178ecc0a24 100644 --- a/lib/Analysis/LazyBlockFrequencyInfo.cpp +++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp @@ -9,7 +9,7 @@ // // This is an alternative analysis pass to BlockFrequencyInfoWrapperPass. The // difference is that with this pass the block frequencies are not computed when -// the analysis pass is executed but rather when the BFI results is explicitly +// the analysis pass is executed but rather when the BFI result is explicitly // requested by the analysis client. // //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index f7cf8c6729f2..eef56815f2e0 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -18,26 +18,50 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include <utility> using namespace llvm; #define DEBUG_TYPE "lcg" +void LazyCallGraph::EdgeSequence::insertEdgeInternal(Node &TargetN, + Edge::Kind EK) { + EdgeIndexMap.insert({&TargetN, Edges.size()}); + Edges.emplace_back(TargetN, EK); +} + +void LazyCallGraph::EdgeSequence::setEdgeKind(Node &TargetN, Edge::Kind EK) { + Edges[EdgeIndexMap.find(&TargetN)->second].setKind(EK); +} + +bool LazyCallGraph::EdgeSequence::removeEdgeInternal(Node &TargetN) { + auto IndexMapI = EdgeIndexMap.find(&TargetN); + if (IndexMapI == EdgeIndexMap.end()) + return false; + + Edges[IndexMapI->second] = Edge(); + EdgeIndexMap.erase(IndexMapI); + return true; +} + static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges, - DenseMap<Function *, int> &EdgeIndexMap, Function &F, - LazyCallGraph::Edge::Kind EK) { - if (!EdgeIndexMap.insert({&F, Edges.size()}).second) + DenseMap<LazyCallGraph::Node *, int> &EdgeIndexMap, + LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK) { + if (!EdgeIndexMap.insert({&N, Edges.size()}).second) return; - DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n"); - Edges.emplace_back(LazyCallGraph::Edge(F, EK)); + DEBUG(dbgs() << " Added callable function: " << N.getName() << "\n"); + Edges.emplace_back(LazyCallGraph::Edge(N, EK)); } -LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) - : G(&G), F(F), DFSNumber(0), LowLink(0) { - DEBUG(dbgs() << " Adding functions called by '" << F.getName() +LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() { + assert(!Edges && "Must not have already populated the edges for this node!"); + + DEBUG(dbgs() << " Adding functions called by '" << getName() << "' to the graph.\n"); + Edges = EdgeSequence(); + SmallVector<Constant *, 16> Worklist; SmallPtrSet<Function *, 4> Callees; SmallPtrSet<Constant *, 16> Visited; @@ -58,14 +82,15 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) // alias. Then a test of the address of the weak function against the new // strong definition's address would be an effective way to determine the // safety of optimizing a direct call edge. - for (BasicBlock &BB : F) + for (BasicBlock &BB : *F) for (Instruction &I : BB) { if (auto CS = CallSite(&I)) if (Function *Callee = CS.getCalledFunction()) if (!Callee->isDeclaration()) if (Callees.insert(Callee).second) { Visited.insert(Callee); - addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call); + addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*Callee), + LazyCallGraph::Edge::Call); } for (Value *Op : I.operand_values()) @@ -78,50 +103,33 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) // function containing) operands to all of the instructions in the function. // Process them (recursively) collecting every function found. visitReferences(Worklist, Visited, [&](Function &F) { - addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref); + addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(F), + LazyCallGraph::Edge::Ref); }); -} - -void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) { - if (Node *N = G->lookup(Target)) - return insertEdgeInternal(*N, EK); - - EdgeIndexMap.insert({&Target, Edges.size()}); - Edges.emplace_back(Target, EK); -} -void LazyCallGraph::Node::insertEdgeInternal(Node &TargetN, Edge::Kind EK) { - EdgeIndexMap.insert({&TargetN.getFunction(), Edges.size()}); - Edges.emplace_back(TargetN, EK); + return *Edges; } -void LazyCallGraph::Node::setEdgeKind(Function &TargetF, Edge::Kind EK) { - Edges[EdgeIndexMap.find(&TargetF)->second].setKind(EK); +void LazyCallGraph::Node::replaceFunction(Function &NewF) { + assert(F != &NewF && "Must not replace a function with itself!"); + F = &NewF; } -void LazyCallGraph::Node::removeEdgeInternal(Function &Target) { - auto IndexMapI = EdgeIndexMap.find(&Target); - assert(IndexMapI != EdgeIndexMap.end() && - "Target not in the edge set for this caller?"); - - Edges[IndexMapI->second] = Edge(); - EdgeIndexMap.erase(IndexMapI); -} - -void LazyCallGraph::Node::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const { dbgs() << *this << '\n'; } +#endif -LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { +LazyCallGraph::LazyCallGraph(Module &M) { DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() << "\n"); for (Function &F : M) - if (!F.isDeclaration() && !F.hasLocalLinkage()) - if (EntryIndexMap.insert({&F, EntryEdges.size()}).second) { - DEBUG(dbgs() << " Adding '" << F.getName() - << "' to entry set of the graph.\n"); - EntryEdges.emplace_back(F, Edge::Ref); - } + if (!F.isDeclaration() && !F.hasLocalLinkage()) { + DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); + addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref); + } // Now add entry nodes for functions reachable via initializers to globals. SmallVector<Constant *, 16> Worklist; @@ -134,21 +142,15 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { DEBUG(dbgs() << " Adding functions referenced by global initializers to the " "entry set.\n"); visitReferences(Worklist, Visited, [&](Function &F) { - addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref); + addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), + LazyCallGraph::Edge::Ref); }); - - for (const Edge &E : EntryEdges) - RefSCCEntryNodes.push_back(&E.getFunction()); } LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), - EntryEdges(std::move(G.EntryEdges)), - EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)), - SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)), - DFSStack(std::move(G.DFSStack)), - RefSCCEntryNodes(std::move(G.RefSCCEntryNodes)), - NextDFSNumber(G.NextDFSNumber) { + EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)), + SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)) { updateGraphPtrs(); } @@ -156,20 +158,18 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { BPA = std::move(G.BPA); NodeMap = std::move(G.NodeMap); EntryEdges = std::move(G.EntryEdges); - EntryIndexMap = std::move(G.EntryIndexMap); SCCBPA = std::move(G.SCCBPA); SCCMap = std::move(G.SCCMap); LeafRefSCCs = std::move(G.LeafRefSCCs); - DFSStack = std::move(G.DFSStack); - RefSCCEntryNodes = std::move(G.RefSCCEntryNodes); - NextDFSNumber = G.NextDFSNumber; updateGraphPtrs(); return *this; } -void LazyCallGraph::SCC::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LazyCallGraph::SCC::dump() const { dbgs() << *this << '\n'; } +#endif #ifndef NDEBUG void LazyCallGraph::SCC::verify() { @@ -184,8 +184,8 @@ void LazyCallGraph::SCC::verify() { "Must set DFS numbers to -1 when adding a node to an SCC!"); assert(N->LowLink == -1 && "Must set low link to -1 when adding a node to an SCC!"); - for (Edge &E : *N) - assert(E.getNode() && "Can't have an edge to a raw function!"); + for (Edge &E : **N) + assert(E.getNode() && "Can't have an unpopulated node!"); } } #endif @@ -195,10 +195,9 @@ bool LazyCallGraph::SCC::isParentOf(const SCC &C) const { return false; for (Node &N : *this) - for (Edge &E : N.calls()) - if (Node *CalleeN = E.getNode()) - if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C) - return true; + for (Edge &E : N->calls()) + if (OuterRefSCC->G->lookupSCC(E.getNode()) == &C) + return true; // No edges found. return false; @@ -218,11 +217,8 @@ bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const { do { const SCC &C = *Worklist.pop_back_val(); for (Node &N : C) - for (Edge &E : N.calls()) { - Node *CalleeN = E.getNode(); - if (!CalleeN) - continue; - SCC *CalleeC = G.lookupSCC(*CalleeN); + for (Edge &E : N->calls()) { + SCC *CalleeC = G.lookupSCC(E.getNode()); if (!CalleeC) continue; @@ -243,9 +239,11 @@ bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const { LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {} -void LazyCallGraph::RefSCC::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LazyCallGraph::RefSCC::dump() const { dbgs() << *this << '\n'; } +#endif #ifndef NDEBUG void LazyCallGraph::RefSCC::verify() { @@ -279,10 +277,10 @@ void LazyCallGraph::RefSCC::verify() { for (int i = 0, Size = SCCs.size(); i < Size; ++i) { SCC &SourceSCC = *SCCs[i]; for (Node &N : SourceSCC) - for (Edge &E : N) { + for (Edge &E : *N) { if (!E.isCall()) continue; - SCC &TargetSCC = *G->lookupSCC(*E.getNode()); + SCC &TargetSCC = *G->lookupSCC(E.getNode()); if (&TargetSCC.getOuterRefSCC() == this) { assert(SCCIndices.find(&TargetSCC)->second <= i && "Edge between SCCs violates post-order relationship."); @@ -299,8 +297,8 @@ void LazyCallGraph::RefSCC::verify() { auto HasConnectingEdge = [&] { for (SCC &C : *ParentRC) for (Node &N : C) - for (Edge &E : N) - if (G->lookupRefSCC(*E.getNode()) == this) + for (Edge &E : *N) + if (G->lookupRefSCC(E.getNode()) == this) return true; return false; }; @@ -461,7 +459,7 @@ updatePostorderSequenceForEdgeInsertion( SmallVector<LazyCallGraph::SCC *, 1> LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { - assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!"); + assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!"); SmallVector<SCC *, 1> DeletedSCCs; #ifndef NDEBUG @@ -477,7 +475,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // If the two nodes are already part of the same SCC, we're also done as // we've just added more connectivity. if (&SourceSCC == &TargetSCC) { - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + SourceN->setEdgeKind(TargetN, Edge::Call); return DeletedSCCs; } @@ -490,7 +488,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { int SourceIdx = SCCIndices[&SourceSCC]; int TargetIdx = SCCIndices[&TargetSCC]; if (TargetIdx < SourceIdx) { - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + SourceN->setEdgeKind(TargetN, Edge::Call); return DeletedSCCs; } @@ -504,11 +502,9 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { ConnectedSet.insert(&SourceSCC); auto IsConnected = [&](SCC &C) { for (Node &N : C) - for (Edge &E : N.calls()) { - assert(E.getNode() && "Must have formed a node within an SCC!"); - if (ConnectedSet.count(G->lookupSCC(*E.getNode()))) + for (Edge &E : N->calls()) + if (ConnectedSet.count(G->lookupSCC(E.getNode()))) return true; - } return false; }; @@ -535,11 +531,10 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { do { SCC &C = *Worklist.pop_back_val(); for (Node &N : C) - for (Edge &E : N) { - assert(E.getNode() && "Must have formed a node within an SCC!"); + for (Edge &E : *N) { if (!E.isCall()) continue; - SCC &EdgeC = *G->lookupSCC(*E.getNode()); + SCC &EdgeC = *G->lookupSCC(E.getNode()); if (&EdgeC.getOuterRefSCC() != this) // Not in this RefSCC... continue; @@ -565,7 +560,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // new cycles. We're done. if (MergeRange.begin() == MergeRange.end()) { // Now that the SCC structure is finalized, flip the kind to call. - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + SourceN->setEdgeKind(TargetN, Edge::Call); return DeletedSCCs; } @@ -600,7 +595,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { SCCIndices[C] -= IndexOffset; // Now that the SCC structure is finalized, flip the kind to call. - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + SourceN->setEdgeKind(TargetN, Edge::Call); // And we're done! return DeletedSCCs; @@ -608,7 +603,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN, Node &TargetN) { - assert(SourceN[TargetN].isCall() && "Must start with a call edge!"); + assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!"); #ifndef NDEBUG // In a debug build, verify the RefSCC is valid to start with and when this @@ -625,12 +620,12 @@ void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN, "Source and Target must be in separate SCCs for this to be trivial!"); // Set the edge kind. - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref); + SourceN->setEdgeKind(TargetN, Edge::Ref); } iterator_range<LazyCallGraph::RefSCC::iterator> LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { - assert(SourceN[TargetN].isCall() && "Must start with a call edge!"); + assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!"); #ifndef NDEBUG // In a debug build, verify the RefSCC is valid to start with and when this @@ -650,7 +645,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { "full CG update."); // Set the edge kind. - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref); + SourceN->setEdgeKind(TargetN, Edge::Ref); // Otherwise we are removing a call edge from a single SCC. This may break // the cycle. In order to compute the new set of SCCs, we need to do a small @@ -665,7 +660,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { // etc. SCC &OldSCC = TargetSCC; - SmallVector<std::pair<Node *, call_edge_iterator>, 16> DFSStack; + SmallVector<std::pair<Node *, EdgeSequence::call_iterator>, 16> DFSStack; SmallVector<Node *, 16> PendingSCCStack; SmallVector<SCC *, 4> NewSCCs; @@ -706,14 +701,14 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { RootN->DFSNumber = RootN->LowLink = 1; int NextDFSNumber = 2; - DFSStack.push_back({RootN, RootN->call_begin()}); + DFSStack.push_back({RootN, (*RootN)->call_begin()}); do { Node *N; - call_edge_iterator I; + EdgeSequence::call_iterator I; std::tie(N, I) = DFSStack.pop_back_val(); - auto E = N->call_end(); + auto E = (*N)->call_end(); while (I != E) { - Node &ChildN = *I->getNode(); + Node &ChildN = I->getNode(); if (ChildN.DFSNumber == 0) { // We haven't yet visited this child, so descend, pushing the current // node onto the stack. @@ -723,8 +718,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { "Found a node with 0 DFS number but already in an SCC!"); ChildN.DFSNumber = ChildN.LowLink = NextDFSNumber++; N = &ChildN; - I = N->call_begin(); - E = N->call_end(); + I = (*N)->call_begin(); + E = (*N)->call_end(); continue; } @@ -817,17 +812,19 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN, Node &TargetN) { - assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!"); + assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!"); assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); assert(G->lookupRefSCC(TargetN) != this && "Target must not be in this RefSCC."); +#ifdef EXPENSIVE_CHECKS assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) && "Target must be a descendant of the Source."); +#endif // Edges between RefSCCs are the same regardless of call or ref, so we can // just flip the edge here. - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call); + SourceN->setEdgeKind(TargetN, Edge::Call); #ifndef NDEBUG // Check that the RefSCC is still valid. @@ -837,17 +834,19 @@ void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN, void LazyCallGraph::RefSCC::switchOutgoingEdgeToRef(Node &SourceN, Node &TargetN) { - assert(SourceN[TargetN].isCall() && "Must start with a call edge!"); + assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!"); assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); assert(G->lookupRefSCC(TargetN) != this && "Target must not be in this RefSCC."); +#ifdef EXPENSIVE_CHECKS assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) && "Target must be a descendant of the Source."); +#endif // Edges between RefSCCs are the same regardless of call or ref, so we can // just flip the edge here. - SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref); + SourceN->setEdgeKind(TargetN, Edge::Ref); #ifndef NDEBUG // Check that the RefSCC is still valid. @@ -860,7 +859,7 @@ void LazyCallGraph::RefSCC::insertInternalRefEdge(Node &SourceN, assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC."); - SourceN.insertEdgeInternal(TargetN, Edge::Ref); + SourceN->insertEdgeInternal(TargetN, Edge::Ref); #ifndef NDEBUG // Check that the RefSCC is still valid. @@ -871,14 +870,16 @@ void LazyCallGraph::RefSCC::insertInternalRefEdge(Node &SourceN, void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN, Edge::Kind EK) { // First insert it into the caller. - SourceN.insertEdgeInternal(TargetN, EK); + SourceN->insertEdgeInternal(TargetN, EK); assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); RefSCC &TargetC = *G->lookupRefSCC(TargetN); assert(&TargetC != this && "Target must not be in this RefSCC."); +#ifdef EXPENSIVE_CHECKS assert(TargetC.isDescendantOf(*this) && "Target must be a descendant of the Source."); +#endif // The only change required is to add this SCC to the parent set of the // callee. @@ -895,8 +896,10 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC."); RefSCC &SourceC = *G->lookupRefSCC(SourceN); assert(&SourceC != this && "Source must not be in this RefSCC."); +#ifdef EXPENSIVE_CHECKS assert(SourceC.isDescendantOf(*this) && "Source must be a descendant of the Target."); +#endif SmallVector<RefSCC *, 1> DeletedRefSCCs; @@ -951,9 +954,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { RefSCC &RC = *Worklist.pop_back_val(); for (SCC &C : RC) for (Node &N : C) - for (Edge &E : N) { - assert(E.getNode() && "Must have formed a node!"); - RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode()); + for (Edge &E : *N) { + RefSCC &EdgeRC = *G->lookupRefSCC(E.getNode()); if (G->getRefSCCIndex(EdgeRC) <= SourceIdx) // Not in the postorder sequence between source and target. continue; @@ -1003,10 +1005,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { SCCIndices[&InnerC] = SCCIndex++; for (Node &N : InnerC) { G->SCCMap[&N] = &InnerC; - for (Edge &E : N) { - assert(E.getNode() && - "Cannot have a null node within a visited SCC!"); - RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode()); + for (Edge &E : *N) { + RefSCC &ChildRC = *G->lookupRefSCC(E.getNode()); if (MergeSet.count(&ChildRC)) continue; ChildRC.Parents.erase(RC); @@ -1042,7 +1042,7 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) { // At this point we have a merged RefSCC with a post-order SCCs list, just // connect the nodes to form the new edge. - SourceN.insertEdgeInternal(TargetN, Edge::Ref); + SourceN->insertEdgeInternal(TargetN, Edge::Ref); // We return the list of SCCs which were merged so that callers can // invalidate any data they have associated with those SCCs. Note that these @@ -1069,15 +1069,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { #endif // First remove it from the node. - SourceN.removeEdgeInternal(TargetN.getFunction()); + bool Removed = SourceN->removeEdgeInternal(TargetN); + (void)Removed; + assert(Removed && "Target not in the edge set for this caller?"); bool HasOtherEdgeToChildRC = false; bool HasOtherChildRC = false; for (SCC *InnerC : SCCs) { for (Node &N : *InnerC) { - for (Edge &E : N) { - assert(E.getNode() && "Cannot have a missing node in a visited SCC!"); - RefSCC &OtherChildRC = *G->lookupRefSCC(*E.getNode()); + for (Edge &E : *N) { + RefSCC &OtherChildRC = *G->lookupRefSCC(E.getNode()); if (&OtherChildRC == &TargetRC) { HasOtherEdgeToChildRC = true; break; @@ -1116,7 +1117,7 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) { SmallVector<LazyCallGraph::RefSCC *, 1> LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { - assert(!SourceN[TargetN].isCall() && + assert(!(*SourceN)[TargetN].isCall() && "Cannot remove a call edge, it must first be made a ref edge"); #ifndef NDEBUG @@ -1127,7 +1128,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { #endif // First remove the actual edge. - SourceN.removeEdgeInternal(TargetN.getFunction()); + bool Removed = SourceN->removeEdgeInternal(TargetN); + (void)Removed; + assert(Removed && "Target not in the edge set for this caller?"); // We return a list of the resulting *new* RefSCCs in post-order. SmallVector<RefSCC *, 1> Result; @@ -1186,7 +1189,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { PostOrderMapping[&N] = Number; }; - SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack; + SmallVector<std::pair<Node *, EdgeSequence::iterator>, 4> DFSStack; SmallVector<Node *, 4> PendingRefSCCStack; do { assert(DFSStack.empty() && @@ -1205,18 +1208,18 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { RootN->DFSNumber = RootN->LowLink = 1; int NextDFSNumber = 2; - DFSStack.push_back({RootN, RootN->begin()}); + DFSStack.push_back({RootN, (*RootN)->begin()}); do { Node *N; - edge_iterator I; + EdgeSequence::iterator I; std::tie(N, I) = DFSStack.pop_back_val(); - auto E = N->end(); + auto E = (*N)->end(); assert(N->DFSNumber != 0 && "We should always assign a DFS number " "before processing a node."); while (I != E) { - Node &ChildN = I->getNode(*G); + Node &ChildN = I->getNode(); if (ChildN.DFSNumber == 0) { // Mark that we should start at this child when next this node is the // top of the stack. We don't start at the next child to ensure this @@ -1226,8 +1229,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { // Continue, resetting to the child node. ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; N = &ChildN; - I = ChildN.begin(); - E = ChildN.end(); + I = ChildN->begin(); + E = ChildN->end(); continue; } if (ChildN.DFSNumber == -1) { @@ -1382,9 +1385,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { #endif for (SCC *C : SCCs) for (Node &N : *C) { - for (Edge &E : N) { - assert(E.getNode() && "Cannot have a missing node in a visited SCC!"); - RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode()); + for (Edge &E : *N) { + RefSCC &ChildRC = *G->lookupRefSCC(E.getNode()); if (&ChildRC == this) continue; ChildRC.Parents.insert(this); @@ -1408,9 +1410,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) { for (RefSCC *ParentRC : OldParents) for (SCC &ParentC : *ParentRC) for (Node &ParentN : ParentC) - for (Edge &E : ParentN) { - assert(E.getNode() && "Cannot have a missing node in a visited SCC!"); - RefSCC &RC = *G->lookupRefSCC(*E.getNode()); + for (Edge &E : *ParentN) { + RefSCC &RC = *G->lookupRefSCC(E.getNode()); if (&RC != ParentRC) RC.Parents.insert(ParentRC); } @@ -1448,8 +1449,10 @@ void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN, return; } +#ifdef EXPENSIVE_CHECKS assert(TargetRC.isDescendantOf(*this) && "Target must be a descendant of the Source."); +#endif // The only change required is to add this RefSCC to the parent set of the // target. This is a set and so idempotent if the edge already existed. TargetRC.Parents.insert(this); @@ -1461,25 +1464,29 @@ void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN, // Check that the RefSCC is still valid when we finish. auto ExitVerifier = make_scope_exit([this] { verify(); }); - // Check that we aren't breaking some invariants of the SCC graph. +#ifdef EXPENSIVE_CHECKS + // Check that we aren't breaking some invariants of the SCC graph. Note that + // this is quadratic in the number of edges in the call graph! SCC &SourceC = *G->lookupSCC(SourceN); SCC &TargetC = *G->lookupSCC(TargetN); if (&SourceC != &TargetC) assert(SourceC.isAncestorOf(TargetC) && "Call edge is not trivial in the SCC graph!"); -#endif +#endif // EXPENSIVE_CHECKS +#endif // NDEBUG + // First insert it into the source or find the existing edge. - auto InsertResult = SourceN.EdgeIndexMap.insert( - {&TargetN.getFunction(), SourceN.Edges.size()}); + auto InsertResult = + SourceN->EdgeIndexMap.insert({&TargetN, SourceN->Edges.size()}); if (!InsertResult.second) { // Already an edge, just update it. - Edge &E = SourceN.Edges[InsertResult.first->second]; + Edge &E = SourceN->Edges[InsertResult.first->second]; if (E.isCall()) return; // Nothing to do! E.setKind(Edge::Call); } else { // Create the new edge. - SourceN.Edges.emplace_back(TargetN, Edge::Call); + SourceN->Edges.emplace_back(TargetN, Edge::Call); } // Now that we have the edge, handle the graph fallout. @@ -1491,39 +1498,75 @@ void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) { // Check that the RefSCC is still valid when we finish. auto ExitVerifier = make_scope_exit([this] { verify(); }); +#ifdef EXPENSIVE_CHECKS // Check that we aren't breaking some invariants of the RefSCC graph. RefSCC &SourceRC = *G->lookupRefSCC(SourceN); RefSCC &TargetRC = *G->lookupRefSCC(TargetN); if (&SourceRC != &TargetRC) assert(SourceRC.isAncestorOf(TargetRC) && "Ref edge is not trivial in the RefSCC graph!"); -#endif +#endif // EXPENSIVE_CHECKS +#endif // NDEBUG + // First insert it into the source or find the existing edge. - auto InsertResult = SourceN.EdgeIndexMap.insert( - {&TargetN.getFunction(), SourceN.Edges.size()}); + auto InsertResult = + SourceN->EdgeIndexMap.insert({&TargetN, SourceN->Edges.size()}); if (!InsertResult.second) // Already an edge, we're done. return; // Create the new edge. - SourceN.Edges.emplace_back(TargetN, Edge::Ref); + SourceN->Edges.emplace_back(TargetN, Edge::Ref); // Now that we have the edge, handle the graph fallout. handleTrivialEdgeInsertion(SourceN, TargetN); } -void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) { - assert(SCCMap.empty() && DFSStack.empty() && +void LazyCallGraph::RefSCC::replaceNodeFunction(Node &N, Function &NewF) { + Function &OldF = N.getFunction(); + +#ifndef NDEBUG + // Check that the RefSCC is still valid when we finish. + auto ExitVerifier = make_scope_exit([this] { verify(); }); + + assert(G->lookupRefSCC(N) == this && + "Cannot replace the function of a node outside this RefSCC."); + + assert(G->NodeMap.find(&NewF) == G->NodeMap.end() && + "Must not have already walked the new function!'"); + + // It is important that this replacement not introduce graph changes so we + // insist that the caller has already removed every use of the original + // function and that all uses of the new function correspond to existing + // edges in the graph. The common and expected way to use this is when + // replacing the function itself in the IR without changing the call graph + // shape and just updating the analysis based on that. + assert(&OldF != &NewF && "Cannot replace a function with itself!"); + assert(OldF.use_empty() && + "Must have moved all uses from the old function to the new!"); +#endif + + N.replaceFunction(NewF); + + // Update various call graph maps. + G->NodeMap.erase(&OldF); + G->NodeMap[&NewF] = &N; +} + +void LazyCallGraph::insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK) { + assert(SCCMap.empty() && "This method cannot be called after SCCs have been formed!"); - return SourceN.insertEdgeInternal(Target, EK); + return SourceN->insertEdgeInternal(TargetN, EK); } -void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) { - assert(SCCMap.empty() && DFSStack.empty() && +void LazyCallGraph::removeEdge(Node &SourceN, Node &TargetN) { + assert(SCCMap.empty() && "This method cannot be called after SCCs have been formed!"); - return SourceN.removeEdgeInternal(Target); + bool Removed = SourceN->removeEdgeInternal(TargetN); + (void)Removed; + assert(Removed && "Target not in the edge set for this caller?"); } void LazyCallGraph::removeDeadFunction(Function &F) { @@ -1532,19 +1575,6 @@ void LazyCallGraph::removeDeadFunction(Function &F) { assert(F.use_empty() && "This routine should only be called on trivially dead functions!"); - auto EII = EntryIndexMap.find(&F); - if (EII != EntryIndexMap.end()) { - EntryEdges[EII->second] = Edge(); - EntryIndexMap.erase(EII); - } - - // It's safe to just remove un-visited functions from the RefSCC entry list. - // FIXME: This is a linear operation which could become hot and benefit from - // an index map. - auto RENI = find(RefSCCEntryNodes, &F); - if (RENI != RefSCCEntryNodes.end()) - RefSCCEntryNodes.erase(RENI); - auto NI = NodeMap.find(&F); if (NI == NodeMap.end()) // Not in the graph at all! @@ -1553,22 +1583,16 @@ void LazyCallGraph::removeDeadFunction(Function &F) { Node &N = *NI->second; NodeMap.erase(NI); - if (SCCMap.empty() && DFSStack.empty()) { - // No SCC walk has begun, so removing this is fine and there is nothing + // Remove this from the entry edges if present. + EntryEdges.removeEdgeInternal(N); + + if (SCCMap.empty()) { + // No SCCs have been formed, so removing this is fine and there is nothing // else necessary at this point but clearing out the node. N.clear(); return; } - // Check that we aren't going to break the DFS walk. - assert(all_of(DFSStack, - [&N](const std::pair<Node *, edge_iterator> &Element) { - return Element.first != &N; - }) && - "Tried to remove a function currently in the DFS stack!"); - assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() && - "Tried to remove a function currently pending to add to a RefSCC!"); - // Cannot remove a function which has yet to be visited in the DFS walk, so // if we have a node at all then we must have an SCC and RefSCC. auto CI = SCCMap.find(&N); @@ -1583,13 +1607,19 @@ void LazyCallGraph::removeDeadFunction(Function &F) { // Validate these properties first. assert(C.size() == 1 && "Dead functions must be in a singular SCC"); assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC"); - assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!"); + + // Clean up any remaining reference edges. Note that we walk an unordered set + // here but are just removing and so the order doesn't matter. + for (RefSCC &ParentRC : RC.parents()) + for (SCC &ParentC : ParentRC) + for (Node &ParentN : ParentC) + if (ParentN) + ParentN->removeEdgeInternal(N); // Now remove this RefSCC from any parents sets and the leaf list. - for (Edge &E : N) - if (Node *TargetN = E.getNode()) - if (RefSCC *TargetRC = lookupRefSCC(*TargetN)) - TargetRC->Parents.erase(&RC); + for (Edge &E : *N) + if (RefSCC *TargetRC = lookupRefSCC(E.getNode())) + TargetRC->Parents.erase(&RC); // FIXME: This is a linear operation which could become hot and benefit from // an index map. auto LRI = find(LeafRefSCCs, &RC); @@ -1622,15 +1652,14 @@ void LazyCallGraph::updateGraphPtrs() { { SmallVector<Node *, 16> Worklist; for (Edge &E : EntryEdges) - if (Node *EntryN = E.getNode()) - Worklist.push_back(EntryN); + Worklist.push_back(&E.getNode()); while (!Worklist.empty()) { - Node *N = Worklist.pop_back_val(); - N->G = this; - for (Edge &E : N->Edges) - if (Node *TargetN = E.getNode()) - Worklist.push_back(TargetN); + Node &N = *Worklist.pop_back_val(); + N.G = this; + if (N) + for (Edge &E : *N) + Worklist.push_back(&E.getNode()); } } @@ -1647,34 +1676,18 @@ void LazyCallGraph::updateGraphPtrs() { } } -/// Build the internal SCCs for a RefSCC from a sequence of nodes. -/// -/// Appends the SCCs to the provided vector and updates the map with their -/// indices. Both the vector and map must be empty when passed into this -/// routine. -void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) { - assert(RC.SCCs.empty() && "Already built SCCs!"); - assert(RC.SCCIndices.empty() && "Already mapped SCC indices!"); - - for (Node *N : Nodes) { - assert(N->LowLink >= (*Nodes.begin())->LowLink && - "We cannot have a low link in an SCC lower than its root on the " - "stack!"); +template <typename RootsT, typename GetBeginT, typename GetEndT, + typename GetNodeT, typename FormSCCCallbackT> +void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin, + GetEndT &&GetEnd, GetNodeT &&GetNode, + FormSCCCallbackT &&FormSCC) { + typedef decltype(GetBegin(std::declval<Node &>())) EdgeItT; - // This node will go into the next RefSCC, clear out its DFS and low link - // as we scan. - N->DFSNumber = N->LowLink = 0; - } - - // Each RefSCC contains a DAG of the call SCCs. To build these, we do - // a direct walk of the call edges using Tarjan's algorithm. We reuse the - // internal storage as we won't need it for the outer graph's DFS any longer. - - SmallVector<std::pair<Node *, call_edge_iterator>, 16> DFSStack; + SmallVector<std::pair<Node *, EdgeItT>, 16> DFSStack; SmallVector<Node *, 16> PendingSCCStack; // Scan down the stack and DFS across the call edges. - for (Node *RootN : Nodes) { + for (Node *RootN : Roots) { assert(DFSStack.empty() && "Cannot begin a new root with a non-empty DFS stack!"); assert(PendingSCCStack.empty() && @@ -1690,25 +1703,23 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) { RootN->DFSNumber = RootN->LowLink = 1; int NextDFSNumber = 2; - DFSStack.push_back({RootN, RootN->call_begin()}); + DFSStack.push_back({RootN, GetBegin(*RootN)}); do { Node *N; - call_edge_iterator I; + EdgeItT I; std::tie(N, I) = DFSStack.pop_back_val(); - auto E = N->call_end(); + auto E = GetEnd(*N); while (I != E) { - Node &ChildN = *I->getNode(); + Node &ChildN = GetNode(I); if (ChildN.DFSNumber == 0) { // We haven't yet visited this child, so descend, pushing the current // node onto the stack. DFSStack.push_back({N, I}); - assert(!lookupSCC(ChildN) && - "Found a node with 0 DFS number but already in an SCC!"); ChildN.DFSNumber = ChildN.LowLink = NextDFSNumber++; N = &ChildN; - I = N->call_begin(); - E = N->call_end(); + I = GetBegin(*N); + E = GetEnd(*N); continue; } @@ -1750,20 +1761,93 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) { })); // Form a new SCC out of these nodes and then clear them off our pending // stack. - RC.SCCs.push_back(createSCC(RC, SCCNodes)); - for (Node &N : *RC.SCCs.back()) { - N.DFSNumber = N.LowLink = -1; - SCCMap[&N] = RC.SCCs.back(); - } + FormSCC(SCCNodes); PendingSCCStack.erase(SCCNodes.end().base(), PendingSCCStack.end()); } while (!DFSStack.empty()); } +} + +/// Build the internal SCCs for a RefSCC from a sequence of nodes. +/// +/// Appends the SCCs to the provided vector and updates the map with their +/// indices. Both the vector and map must be empty when passed into this +/// routine. +void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) { + assert(RC.SCCs.empty() && "Already built SCCs!"); + assert(RC.SCCIndices.empty() && "Already mapped SCC indices!"); + + for (Node *N : Nodes) { + assert(N->LowLink >= (*Nodes.begin())->LowLink && + "We cannot have a low link in an SCC lower than its root on the " + "stack!"); + + // This node will go into the next RefSCC, clear out its DFS and low link + // as we scan. + N->DFSNumber = N->LowLink = 0; + } + + // Each RefSCC contains a DAG of the call SCCs. To build these, we do + // a direct walk of the call edges using Tarjan's algorithm. We reuse the + // internal storage as we won't need it for the outer graph's DFS any longer. + buildGenericSCCs( + Nodes, [](Node &N) { return N->call_begin(); }, + [](Node &N) { return N->call_end(); }, + [](EdgeSequence::call_iterator I) -> Node & { return I->getNode(); }, + [this, &RC](node_stack_range Nodes) { + RC.SCCs.push_back(createSCC(RC, Nodes)); + for (Node &N : *RC.SCCs.back()) { + N.DFSNumber = N.LowLink = -1; + SCCMap[&N] = RC.SCCs.back(); + } + }); // Wire up the SCC indices. for (int i = 0, Size = RC.SCCs.size(); i < Size; ++i) RC.SCCIndices[RC.SCCs[i]] = i; } +void LazyCallGraph::buildRefSCCs() { + if (EntryEdges.empty() || !PostOrderRefSCCs.empty()) + // RefSCCs are either non-existent or already built! + return; + + assert(RefSCCIndices.empty() && "Already mapped RefSCC indices!"); + + SmallVector<Node *, 16> Roots; + for (Edge &E : *this) + Roots.push_back(&E.getNode()); + + // The roots will be popped of a stack, so use reverse to get a less + // surprising order. This doesn't change any of the semantics anywhere. + std::reverse(Roots.begin(), Roots.end()); + + buildGenericSCCs( + Roots, + [](Node &N) { + // We need to populate each node as we begin to walk its edges. + N.populate(); + return N->begin(); + }, + [](Node &N) { return N->end(); }, + [](EdgeSequence::iterator I) -> Node & { return I->getNode(); }, + [this](node_stack_range Nodes) { + RefSCC *NewRC = createRefSCC(*this); + buildSCCs(*NewRC, Nodes); + connectRefSCC(*NewRC); + + // Push the new node into the postorder list and remember its position + // in the index map. + bool Inserted = + RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second; + (void)Inserted; + assert(Inserted && "Cannot already have this RefSCC in the index map!"); + PostOrderRefSCCs.push_back(NewRC); +#ifndef NDEBUG + NewRC->verify(); +#endif + }); +} + // FIXME: We should move callers of this to embed the parent linking and leaf // tracking into their DFS in order to remove a full walk of all edges. void LazyCallGraph::connectRefSCC(RefSCC &RC) { @@ -1773,10 +1857,8 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) { bool IsLeaf = true; for (SCC &C : RC) for (Node &N : C) - for (Edge &E : N) { - assert(E.getNode() && - "Cannot have a missing node in a visited part of the graph!"); - RefSCC &ChildRC = *lookupRefSCC(*E.getNode()); + for (Edge &E : *N) { + RefSCC &ChildRC = *lookupRefSCC(E.getNode()); if (&ChildRC == &RC) continue; ChildRC.Parents.insert(&RC); @@ -1788,113 +1870,13 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) { LeafRefSCCs.push_back(&RC); } -bool LazyCallGraph::buildNextRefSCCInPostOrder() { - if (DFSStack.empty()) { - Node *N; - do { - // If we've handled all candidate entry nodes to the SCC forest, we're - // done. - if (RefSCCEntryNodes.empty()) - return false; - - N = &get(*RefSCCEntryNodes.pop_back_val()); - } while (N->DFSNumber != 0); - - // Found a new root, begin the DFS here. - N->LowLink = N->DFSNumber = 1; - NextDFSNumber = 2; - DFSStack.push_back({N, N->begin()}); - } - - for (;;) { - Node *N; - edge_iterator I; - std::tie(N, I) = DFSStack.pop_back_val(); - - assert(N->DFSNumber > 0 && "We should always assign a DFS number " - "before placing a node onto the stack."); - - auto E = N->end(); - while (I != E) { - Node &ChildN = I->getNode(*this); - if (ChildN.DFSNumber == 0) { - // We haven't yet visited this child, so descend, pushing the current - // node onto the stack. - DFSStack.push_back({N, N->begin()}); - - assert(!SCCMap.count(&ChildN) && - "Found a node with 0 DFS number but already in an SCC!"); - ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; - N = &ChildN; - I = N->begin(); - E = N->end(); - continue; - } - - // If the child has already been added to some child component, it - // couldn't impact the low-link of this parent because it isn't - // connected, and thus its low-link isn't relevant so skip it. - if (ChildN.DFSNumber == -1) { - ++I; - continue; - } - - // Track the lowest linked child as the lowest link for this node. - assert(ChildN.LowLink > 0 && "Must have a positive low-link number!"); - if (ChildN.LowLink < N->LowLink) - N->LowLink = ChildN.LowLink; - - // Move to the next edge. - ++I; - } - - // We've finished processing N and its descendents, put it on our pending - // SCC stack to eventually get merged into an SCC of nodes. - PendingRefSCCStack.push_back(N); - - // If this node is linked to some lower entry, continue walking up the - // stack. - if (N->LowLink != N->DFSNumber) { - assert(!DFSStack.empty() && - "We never found a viable root for an SCC to pop off!"); - continue; - } - - // Otherwise, form a new RefSCC from the top of the pending node stack. - int RootDFSNumber = N->DFSNumber; - // Find the range of the node stack by walking down until we pass the - // root DFS number. - auto RefSCCNodes = node_stack_range( - PendingRefSCCStack.rbegin(), - find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) { - return N->DFSNumber < RootDFSNumber; - })); - // Form a new RefSCC out of these nodes and then clear them off our pending - // stack. - RefSCC *NewRC = createRefSCC(*this); - buildSCCs(*NewRC, RefSCCNodes); - connectRefSCC(*NewRC); - PendingRefSCCStack.erase(RefSCCNodes.end().base(), - PendingRefSCCStack.end()); - - // Push the new node into the postorder list and return true indicating we - // successfully grew the postorder sequence by one. - bool Inserted = - RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second; - (void)Inserted; - assert(Inserted && "Cannot already have this RefSCC in the index map!"); - PostOrderRefSCCs.push_back(NewRC); - return true; - } -} - AnalysisKey LazyCallGraphAnalysis::Key; LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) { OS << " Edges in function: " << N.getFunction().getName() << "\n"; - for (const LazyCallGraph::Edge &E : N) + for (LazyCallGraph::Edge &E : N.populate()) OS << " " << (E.isCall() ? "call" : "ref ") << " -> " << E.getFunction().getName() << "\n"; @@ -1929,6 +1911,7 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M, for (Function &F : M) printNode(OS, G.get(F)); + G.buildRefSCCs(); for (LazyCallGraph::RefSCC &C : G.postorder_ref_sccs()) printRefSCC(OS, C); @@ -1941,7 +1924,7 @@ LazyCallGraphDOTPrinterPass::LazyCallGraphDOTPrinterPass(raw_ostream &OS) static void printNodeDOT(raw_ostream &OS, LazyCallGraph::Node &N) { std::string Name = "\"" + DOT::EscapeString(N.getFunction().getName()) + "\""; - for (const LazyCallGraph::Edge &E : N) { + for (LazyCallGraph::Edge &E : N.populate()) { OS << " " << Name << " -> \"" << DOT::EscapeString(E.getFunction().getName()) << "\""; if (!E.isCall()) // It is a ref edge. diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index d442310476cf..ad01f7f2f215 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -31,6 +32,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" #include <map> #include <stack> @@ -39,6 +41,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "lazy-value-info" +// This is the number of worklist items we will process to try to discover an +// answer for a given value. +static const unsigned MaxProcessedPerValue = 500; + char LazyValueInfoWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -358,6 +364,7 @@ namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { + friend class LazyValueInfoAnnotatedWriter; /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. @@ -366,22 +373,23 @@ namespace { struct ValueCacheEntryTy { ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {} LVIValueHandle Handle; - SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals; + SmallDenseMap<PoisoningVH<BasicBlock>, LVILatticeVal, 4> BlockVals; }; - /// This is all of the cached information for all values, - /// mapped from Value* to key information. - DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache; - /// This tracks, on a per-block basis, the set of values that are /// over-defined at the end of that block. - typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>> + typedef DenseMap<PoisoningVH<BasicBlock>, SmallPtrSet<Value *, 4>> OverDefinedCacheTy; - OverDefinedCacheTy OverDefinedCache; - /// Keep track of all blocks that we have ever seen, so we /// don't spend time removing unused blocks from our caches. - DenseSet<AssertingVH<BasicBlock> > SeenBlocks; + DenseSet<PoisoningVH<BasicBlock> > SeenBlocks; + + protected: + /// This is all of the cached information for all values, + /// mapped from Value* to key information. + DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache; + OverDefinedCacheTy OverDefinedCache; + public: void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { @@ -435,6 +443,7 @@ namespace { return BBI->second; } + void printCache(Function &F, raw_ostream &OS); /// clear - Empty the cache. void clear() { SeenBlocks.clear(); @@ -458,16 +467,71 @@ namespace { }; } + +namespace { + + /// An assembly annotator class to print LazyValueCache information in + /// comments. + class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { + const LazyValueInfoCache* LVICache; + + public: + LazyValueInfoAnnotatedWriter(const LazyValueInfoCache *L) : LVICache(L) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) { + auto ODI = LVICache->OverDefinedCache.find(const_cast<BasicBlock*>(BB)); + if (ODI == LVICache->OverDefinedCache.end()) + return; + OS << "; OverDefined values for block are: \n"; + for (auto *V : ODI->second) + OS << ";" << *V << "\n"; + + // Find if there are latticevalues defined for arguments of the function. + auto *F = const_cast<Function *>(BB->getParent()); + for (auto &Arg : F->args()) { + auto VI = LVICache->ValueCache.find_as(&Arg); + if (VI == LVICache->ValueCache.end()) + continue; + auto BBI = VI->second->BlockVals.find(const_cast<BasicBlock *>(BB)); + if (BBI != VI->second->BlockVals.end()) + OS << "; CachedLatticeValue for: '" << *VI->first << "' is: '" + << BBI->second << "'\n"; + } + } + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + + auto VI = LVICache->ValueCache.find_as(const_cast<Instruction *>(I)); + if (VI == LVICache->ValueCache.end()) + return; + OS << "; CachedLatticeValues for: '" << *VI->first << "'\n"; + for (auto &BV : VI->second->BlockVals) { + OS << "; at beginning of BasicBlock: '"; + BV.first->printAsOperand(OS, false); + OS << "' LatticeVal: '" << BV.second << "' \n"; + } + } +}; +} + +void LazyValueInfoCache::printCache(Function &F, raw_ostream &OS) { + LazyValueInfoAnnotatedWriter Writer(this); + F.print(OS, &Writer); + +} + void LazyValueInfoCache::eraseValue(Value *V) { - SmallVector<AssertingVH<BasicBlock>, 4> ToErase; - for (auto &I : OverDefinedCache) { - SmallPtrSetImpl<Value *> &ValueSet = I.second; + for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) { + // Copy and increment the iterator immediately so we can erase behind + // ourselves. + auto Iter = I++; + SmallPtrSetImpl<Value *> &ValueSet = Iter->second; ValueSet.erase(V); if (ValueSet.empty()) - ToErase.push_back(I.first); + OverDefinedCache.erase(Iter); } - for (auto &BB : ToErase) - OverDefinedCache.erase(BB); ValueCache.erase(V); } @@ -480,7 +544,7 @@ void LVIValueHandle::deleted() { void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { // Shortcut if we have never seen this block. - DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); + DenseSet<PoisoningVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); if (I == SeenBlocks.end()) return; SeenBlocks.erase(I); @@ -563,7 +627,7 @@ namespace { /// This stack holds the state of the value solver during a query. /// It basically emulates the callstack of the naive /// recursive value lookup process. - std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack; + SmallVector<std::pair<BasicBlock*, Value*>, 8> BlockValueStack; /// Keeps track of which block-value pairs are in BlockValueStack. DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet; @@ -576,7 +640,7 @@ namespace { DEBUG(dbgs() << "PUSH: " << *BV.second << " in " << BV.first->getName() << "\n"); - BlockValueStack.push(BV); + BlockValueStack.push_back(BV); return true; } @@ -629,6 +693,11 @@ namespace { TheCache.clear(); } + /// Printing the LazyValueInfoCache. + void printCache(Function &F, raw_ostream &OS) { + TheCache.printCache(F, OS); + } + /// This is part of the update interface to inform the cache /// that a block has been deleted. void eraseBlock(BasicBlock *BB) { @@ -646,24 +715,50 @@ namespace { } // end anonymous namespace void LazyValueInfoImpl::solve() { + SmallVector<std::pair<BasicBlock *, Value *>, 8> StartingStack( + BlockValueStack.begin(), BlockValueStack.end()); + + unsigned processedCount = 0; while (!BlockValueStack.empty()) { - std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); + processedCount++; + // Abort if we have to process too many values to get a result for this one. + // Because of the design of the overdefined cache currently being per-block + // to avoid naming-related issues (IE it wants to try to give different + // results for the same name in different blocks), overdefined results don't + // get cached globally, which in turn means we will often try to rediscover + // the same overdefined result again and again. Once something like + // PredicateInfo is used in LVI or CVP, we should be able to make the + // overdefined cache global, and remove this throttle. + if (processedCount > MaxProcessedPerValue) { + DEBUG(dbgs() << "Giving up on stack because we are getting too deep\n"); + // Fill in the original values + while (!StartingStack.empty()) { + std::pair<BasicBlock *, Value *> &e = StartingStack.back(); + TheCache.insertResult(e.second, e.first, + LVILatticeVal::getOverdefined()); + StartingStack.pop_back(); + } + BlockValueSet.clear(); + BlockValueStack.clear(); + return; + } + std::pair<BasicBlock *, Value *> e = BlockValueStack.back(); assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!"); if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. - assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); + assert(BlockValueStack.back() == e && "Nothing should have been pushed!"); assert(TheCache.hasCachedValueInfo(e.second, e.first) && "Result should be in cache!"); DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName() << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n"); - BlockValueStack.pop(); + BlockValueStack.pop_back(); BlockValueSet.erase(e); } else { // More work needs to be done before revisiting. - assert(BlockValueStack.top() != e && "Stack should have been pushed!"); + assert(BlockValueStack.back() != e && "Stack should have been pushed!"); } } } @@ -839,13 +934,19 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, } // Loop over all of our predecessors, merging what we know from them into - // result. - bool EdgesMissing = false; + // result. If we encounter an unexplored predecessor, we eagerly explore it + // in a depth first manner. In practice, this has the effect of discovering + // paths we can't analyze eagerly without spending compile times analyzing + // other paths. This heuristic benefits from the fact that predecessors are + // frequently arranged such that dominating ones come first and we quickly + // find a path to function entry. TODO: We should consider explicitly + // canonicalizing to make this true rather than relying on this happy + // accident. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { LVILatticeVal EdgeResult; - EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult); - if (EdgesMissing) - continue; + if (!getEdgeValue(Val, *PI, BB, EdgeResult)) + // Explore that input, then return here + return false; Result.mergeIn(EdgeResult, DL); @@ -866,8 +967,6 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, return true; } } - if (EdgesMissing) - return false; // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined()); @@ -880,8 +979,8 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, LVILatticeVal Result; // Start Undefined. // Loop over all of our predecessors, merging what we know from them into - // result. - bool EdgesMissing = false; + // result. See the comment about the chosen traversal order in + // solveBlockValueNonLocal; the same reasoning applies here. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); @@ -889,9 +988,9 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, // Note that we can provide PN as the context value to getEdgeValue, even // though the results will be cached, because PN is the value being used as // the cache key in the caller. - EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN); - if (EdgesMissing) - continue; + if (!getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN)) + // Explore that input, then return here + return false; Result.mergeIn(EdgeResult, DL); @@ -905,8 +1004,6 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV, return true; } } - if (EdgesMissing) - return false; // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined() && "Possible PHI in entry block?"); @@ -1333,14 +1430,14 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, unsigned BitWidth = Val->getType()->getIntegerBitWidth(); ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/); - for (SwitchInst::CaseIt i : SI->cases()) { - ConstantRange EdgeVal(i.getCaseValue()->getValue()); + for (auto Case : SI->cases()) { + ConstantRange EdgeVal(Case.getCaseValue()->getValue()); if (DefaultCase) { // It is possible that the default destination is the destination of // some cases. There is no need to perform difference for those cases. - if (i.getCaseSuccessor() != BBTo) + if (Case.getCaseSuccessor() != BBTo) EdgesVals = EdgesVals.difference(EdgeVal); - } else if (i.getCaseSuccessor() == BBTo) + } else if (Case.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); } Result = LVILatticeVal::getRange(std::move(EdgesVals)); @@ -1352,8 +1449,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, /// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at /// the basic block if the edge does not constrain Val. bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, LVILatticeVal &Result, - Instruction *CxtI) { + BasicBlock *BBTo, LVILatticeVal &Result, + Instruction *CxtI) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast<Constant>(Val)) { Result = LVILatticeVal::get(VC); @@ -1503,6 +1600,18 @@ void LazyValueInfo::releaseMemory() { } } +bool LazyValueInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // We need to invalidate if we have either failed to preserve this analyses + // result directly or if any of its dependencies have been invalidated. + auto PAC = PA.getChecker<LazyValueAnalysis>(); + if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || + (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA))) + return true; + + return false; +} + void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); } LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { @@ -1510,7 +1619,7 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); - return LazyValueInfo(&AC, &TLI, DT); + return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI, DT); } /// Returns true if we can statically tell that this value will never be a @@ -1780,3 +1889,40 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) { getImpl(PImpl, AC, &DL, DT).eraseBlock(BB); } } + + +void LazyValueInfo::printCache(Function &F, raw_ostream &OS) { + if (PImpl) { + getImpl(PImpl, AC, DL, DT).printCache(F, OS); + } +} + +namespace { +// Printer class for LazyValueInfo results. +class LazyValueInfoPrinter : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LazyValueInfoPrinter() : FunctionPass(ID) { + initializeLazyValueInfoPrinterPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<LazyValueInfoWrapperPass>(); + } + + bool runOnFunction(Function &F) override { + dbgs() << "LVI for function '" << F.getName() << "':\n"; + auto &LVI = getAnalysis<LazyValueInfoWrapperPass>().getLVI(); + LVI.printCache(F, dbgs()); + return false; + } +}; +} + +char LazyValueInfoPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(LazyValueInfoPrinter, "print-lazy-value-info", + "Lazy Value Info Printer Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) +INITIALIZE_PASS_END(LazyValueInfoPrinter, "print-lazy-value-info", + "Lazy Value Info Printer Pass", false, false) diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index e46541e6538d..96799a459bfc 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -312,21 +312,26 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, bool *IsLoadCSE) { - if (MaxInstsToScan == 0) - MaxInstsToScan = ~0U; - - Value *Ptr = Load->getPointerOperand(); - Type *AccessTy = Load->getType(); - - // We can never remove a volatile load - if (Load->isVolatile()) - return nullptr; - - // Anything stronger than unordered is currently unimplemented. + AliasAnalysis *AA, bool *IsLoad, + unsigned *NumScanedInst) { + // Don't CSE load that is volatile or anything stronger than unordered. if (!Load->isUnordered()) return nullptr; + return FindAvailablePtrLoadStore( + Load->getPointerOperand(), Load->getType(), Load->isAtomic(), ScanBB, + ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst); +} + +Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, + bool AtLeastAtomic, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA, bool *IsLoadCSE, + unsigned *NumScanedInst) { + if (MaxInstsToScan == 0) + MaxInstsToScan = ~0U; + const DataLayout &DL = ScanBB->getModule()->getDataLayout(); // Try to get the store size for the type. @@ -344,6 +349,9 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, // Restore ScanFrom to expected value in case next test succeeds ScanFrom++; + if (NumScanedInst) + ++(*NumScanedInst); + // Don't scan huge blocks. if (MaxInstsToScan-- == 0) return nullptr; @@ -359,7 +367,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, // We can value forward from an atomic to a non-atomic, but not the // other way around. - if (LI->isAtomic() < Load->isAtomic()) + if (LI->isAtomic() < AtLeastAtomic) return nullptr; if (IsLoadCSE) @@ -378,7 +386,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, // We can value forward from an atomic to a non-atomic, but not the // other way around. - if (SI->isAtomic() < Load->isAtomic()) + if (SI->isAtomic() < AtLeastAtomic) return nullptr; if (IsLoadCSE) diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index bf8007213097..4ba12583ff83 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -135,21 +135,6 @@ bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } -void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message, - const Loop *TheLoop, const char *PassName, - OptimizationRemarkEmitter &ORE) { - DebugLoc DL = TheLoop->getStartLoc(); - const Value *V = TheLoop->getHeader(); - if (const Instruction *I = Message.getInstr()) { - // If there is no debug location attached to the instruction, revert back to - // using the loop's. - if (I->getDebugLoc()) - DL = I->getDebugLoc(); - V = I->getParent(); - } - ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str()); -} - Value *llvm::stripIntegerCast(Value *V) { if (auto *CI = dyn_cast<CastInst>(V)) if (CI->getOperand(0)->getType()->isIntegerTy()) @@ -172,11 +157,6 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, // Strip casts. StrideVal = stripIntegerCast(StrideVal); - // Replace symbolic stride by one. - Value *One = ConstantInt::get(StrideVal->getType(), 1); - ValueToValueMap RewriteMap; - RewriteMap[StrideVal] = One; - ScalarEvolution *SE = PSE.getSE(); const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); const auto *CT = @@ -518,7 +498,7 @@ class AccessAnalysis { public: /// \brief Read or write access location. typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; - typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; + typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, @@ -570,7 +550,7 @@ public: DepChecker.clearDependences(); } - MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } + MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } private: typedef SetVector<MemAccessInfo> PtrAccessSet; @@ -584,8 +564,8 @@ private: const DataLayout &DL; - /// Set of accesses that need a further dependence check. - MemAccessInfoSet CheckDeps; + /// List of accesses that need a further dependence check. + MemAccessInfoList CheckDeps; /// Set of pointers that are read only. SmallPtrSet<Value*, 16> ReadOnlyPtr; @@ -842,7 +822,7 @@ void AccessAnalysis::processMemAccesses() { // there is no other write to the ptr - this is an optimization to // catch "a[i] = a[i] + " without having to do a dependence check). if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { - CheckDeps.insert(Access); + CheckDeps.push_back(Access); IsRTCheckAnalysisNeeded = true; } @@ -1205,6 +1185,73 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, return false; } +/// Given a non-constant (unknown) dependence-distance \p Dist between two +/// memory accesses, that have the same stride whose absolute value is given +/// in \p Stride, and that have the same type size \p TypeByteSize, +/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is +/// possible to prove statically that the dependence distance is larger +/// than the range that the accesses will travel through the execution of +/// the loop. If so, return true; false otherwise. This is useful for +/// example in loops such as the following (PR31098): +/// for (i = 0; i < D; ++i) { +/// = out[i]; +/// out[i+D] = +/// } +static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, + const SCEV &BackedgeTakenCount, + const SCEV &Dist, uint64_t Stride, + uint64_t TypeByteSize) { + + // If we can prove that + // (**) |Dist| > BackedgeTakenCount * Step + // where Step is the absolute stride of the memory accesses in bytes, + // then there is no dependence. + // + // Ratioanle: + // We basically want to check if the absolute distance (|Dist/Step|) + // is >= the loop iteration count (or > BackedgeTakenCount). + // This is equivalent to the Strong SIV Test (Practical Dependence Testing, + // Section 4.2.1); Note, that for vectorization it is sufficient to prove + // that the dependence distance is >= VF; This is checked elsewhere. + // But in some cases we can prune unknown dependence distances early, and + // even before selecting the VF, and without a runtime test, by comparing + // the distance against the loop iteration count. Since the vectorized code + // will be executed only if LoopCount >= VF, proving distance >= LoopCount + // also guarantees that distance >= VF. + // + const uint64_t ByteStride = Stride * TypeByteSize; + const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); + const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); + + const SCEV *CastedDist = &Dist; + const SCEV *CastedProduct = Product; + uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); + uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); + + // The dependence distance can be positive/negative, so we sign extend Dist; + // The multiplication of the absolute stride in bytes and the + // backdgeTakenCount is non-negative, so we zero extend Product. + if (DistTypeSize > ProductTypeSize) + CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); + else + CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); + + // Is Dist - (BackedgeTakenCount * Step) > 0 ? + // (If so, then we have proven (**) because |Dist| >= Dist) + const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); + if (SE.isKnownPositive(Minus)) + return true; + + // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ? + // (If so, then we have proven (**) because |Dist| >= -1*Dist) + const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); + Minus = SE.getMinusSCEV(NegDist, CastedProduct); + if (SE.isKnownPositive(Minus)) + return true; + + return false; +} + /// \brief Check the dependence for two accesses with the same stride \p Stride. /// \p Distance is the positive distance and \p TypeByteSize is type size in /// bytes. @@ -1292,21 +1339,26 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, return Dependence::Unknown; } + Type *ATy = APtr->getType()->getPointerElementType(); + Type *BTy = BPtr->getType()->getPointerElementType(); + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); + uint64_t Stride = std::abs(StrideAPtr); const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); if (!C) { + if (TypeByteSize == DL.getTypeAllocSize(BTy) && + isSafeDependenceDistance(DL, *(PSE.getSE()), + *(PSE.getBackedgeTakenCount()), *Dist, Stride, + TypeByteSize)) + return Dependence::NoDep; + DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; return Dependence::Unknown; } - Type *ATy = APtr->getType()->getPointerElementType(); - Type *BTy = BPtr->getType()->getPointerElementType(); - auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); - uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); - const APInt &Val = C->getAPInt(); int64_t Distance = Val.getSExtValue(); - uint64_t Stride = std::abs(StrideAPtr); // Attempt to prove strided accesses independent. if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && @@ -1427,12 +1479,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, } bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, - MemAccessInfoSet &CheckDeps, + MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides) { MaxSafeDepDistBytes = -1; - while (!CheckDeps.empty()) { - MemAccessInfo CurAccess = *CheckDeps.begin(); + SmallPtrSet<MemAccessInfo, 8> Visited; + for (MemAccessInfo CurAccess : CheckDeps) { + if (Visited.count(CurAccess)) + continue; // Get the relevant memory access set. EquivalenceClasses<MemAccessInfo>::iterator I = @@ -1446,7 +1500,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, // Check every access pair. while (AI != AE) { - CheckDeps.erase(*AI); + Visited.insert(*AI); EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI); while (OI != AE) { // Check every accessing instruction pair in program order. @@ -1885,7 +1939,10 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Value *NewPtr = (Inst && TheLoop->contains(Inst)) ? Exp.expandCodeFor(Sc, PtrArithTy, Loc) : Ptr; - return {NewPtr, NewPtr}; + // We must return a half-open range, which means incrementing Sc. + const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy)); + Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc); + return {NewPtr, NewPtrPlusOne}; } else { Value *Start = nullptr, *End = nullptr; DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp index 5be3ee341c9c..e4a0f90b2f71 100644 --- a/lib/Analysis/LoopAnalysisManager.cpp +++ b/lib/Analysis/LoopAnalysisManager.cpp @@ -31,24 +31,10 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( FunctionAnalysisManager::Invalidator &Inv) { // First compute the sequence of IR units covered by this proxy. We will want // to visit this in postorder, but because this is a tree structure we can do - // this by building a preorder sequence and walking it in reverse. - SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist; - // Note that we want to walk the roots in reverse order because we will end - // up reversing the preorder sequence. However, it happens that the loop nest - // roots are in reverse order within the LoopInfo object. So we just walk - // forward here. - // FIXME: If we change the order of LoopInfo we will want to add a reverse - // here. - for (Loop *RootL : *LI) { - assert(PreOrderWorklist.empty() && - "Must start with an empty preorder walk worklist."); - PreOrderWorklist.push_back(RootL); - do { - Loop *L = PreOrderWorklist.pop_back_val(); - PreOrderWorklist.append(L->begin(), L->end()); - PreOrderLoops.push_back(L); - } while (!PreOrderWorklist.empty()); - } + // this by building a preorder sequence and walking it backwards. We also + // want siblings in forward program order to match the LoopPassManager so we + // get the preorder with siblings reversed. + SmallVector<Loop *, 4> PreOrderLoops = LI->getLoopsInReverseSiblingPreorder(); // If this proxy or the loop info is going to be invalidated, we also need // to clear all the keys coming from that analysis. We also completely blow @@ -145,7 +131,6 @@ LoopAnalysisManagerFunctionProxy::run(Function &F, PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { PreservedAnalyses PA; - PA.preserve<AssumptionAnalysis>(); PA.preserve<DominatorTreeAnalysis>(); PA.preserve<LoopAnalysis>(); PA.preserve<LoopAnalysisManagerFunctionProxy>(); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index f449ce94d57c..ff68810abb82 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -40,9 +40,9 @@ template class llvm::LoopInfoBase<BasicBlock, Loop>; // Always verify loopinfo if expensive checking is enabled. #ifdef EXPENSIVE_CHECKS -static bool VerifyLoopInfo = true; +bool llvm::VerifyLoopInfo = true; #else -static bool VerifyLoopInfo = false; +bool llvm::VerifyLoopInfo = false; #endif static cl::opt<bool,true> VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), @@ -211,9 +211,11 @@ bool Loop::isSafeToClone() const { MDNode *Loop::getLoopID() const { MDNode *LoopID = nullptr; - if (isLoopSimplifyForm()) { - LoopID = getLoopLatch()->getTerminator()->getMetadata(LLVMContext::MD_loop); + if (BasicBlock *Latch = getLoopLatch()) { + LoopID = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop); } else { + assert(!getLoopLatch() && + "The loop should have no single latch at this point"); // Go through each predecessor of the loop header and check the // terminator for the metadata. BasicBlock *H = getHeader(); @@ -248,11 +250,12 @@ void Loop::setLoopID(MDNode *LoopID) const { assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand"); assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself"); - if (isLoopSimplifyForm()) { - getLoopLatch()->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); + if (BasicBlock *Latch = getLoopLatch()) { + Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); return; } + assert(!getLoopLatch() && "The loop should have no single latch at this point"); BasicBlock *H = getHeader(); for (BasicBlock *BB : this->blocks()) { TerminatorInst *TI = BB->getTerminator(); @@ -610,6 +613,15 @@ LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) { analyze(DomTree); } +bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<LoopAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || + PAC.preservedSet<CFGAnalyses>()); +} + void LoopInfo::markAsRemoved(Loop *Unloop) { assert(!Unloop->isInvalid() && "Loop has already been removed"); Unloop->invalidate(); diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 3f4a07942154..0b5f6266e373 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -54,6 +54,8 @@ public: } return false; } + + StringRef getPassName() const override { return "Print Loop IR"; } }; char PrintLoopPassWrapper::ID = 0; diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 2d8274040d39..b8c444904723 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -50,30 +50,30 @@ struct AllocFnsTy { // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. -static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = { - {LibFunc::malloc, {MallocLike, 1, 0, -1}}, - {LibFunc::valloc, {MallocLike, 1, 0, -1}}, - {LibFunc::Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int) - {LibFunc::ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) - {LibFunc::Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long) - {LibFunc::ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow) - {LibFunc::Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) - {LibFunc::ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) - {LibFunc::Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long) - {LibFunc::ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow) - {LibFunc::msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int) - {LibFunc::msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) - {LibFunc::msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long) - {LibFunc::msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned long long, nothrow) - {LibFunc::msvc_new_array_int, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) - {LibFunc::msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) - {LibFunc::msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long) - {LibFunc::msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow) - {LibFunc::calloc, {CallocLike, 2, 0, 1}}, - {LibFunc::realloc, {ReallocLike, 2, 1, -1}}, - {LibFunc::reallocf, {ReallocLike, 2, 1, -1}}, - {LibFunc::strdup, {StrDupLike, 1, -1, -1}}, - {LibFunc::strndup, {StrDupLike, 2, 1, -1}} +static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = { + {LibFunc_malloc, {MallocLike, 1, 0, -1}}, + {LibFunc_valloc, {MallocLike, 1, 0, -1}}, + {LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int) + {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) + {LibFunc_Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long) + {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow) + {LibFunc_Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) + {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) + {LibFunc_Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long) + {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow) + {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int) + {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow) + {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long) + {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned long long, nothrow) + {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1}}, // new[](unsigned int) + {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) + {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long) + {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow) + {LibFunc_calloc, {CallocLike, 2, 0, 1}}, + {LibFunc_realloc, {ReallocLike, 2, 1, -1}}, + {LibFunc_reallocf, {ReallocLike, 2, 1, -1}}, + {LibFunc_strdup, {StrDupLike, 1, -1, -1}}, + {LibFunc_strndup, {StrDupLike, 2, 1, -1}} // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; @@ -106,12 +106,12 @@ getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, const TargetLibraryInfo *TLI) { // Make sure that the function is available. StringRef FnName = Callee->getName(); - LibFunc::Func TLIFn; + LibFunc TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return None; const auto *Iter = find_if( - AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) { + AllocationFnData, [TLIFn](const std::pair<LibFunc, AllocFnsTy> &P) { return P.first == TLIFn; }); @@ -183,7 +183,7 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V, static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias); + return CS && CS.hasRetAttr(Attribute::NoAlias); } @@ -333,33 +333,33 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { return nullptr; StringRef FnName = Callee->getName(); - LibFunc::Func TLIFn; + LibFunc TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return nullptr; unsigned ExpectedNumParams; - if (TLIFn == LibFunc::free || - TLIFn == LibFunc::ZdlPv || // operator delete(void*) - TLIFn == LibFunc::ZdaPv || // operator delete[](void*) - TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*) - TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*) - TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*) - TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*) + if (TLIFn == LibFunc_free || + TLIFn == LibFunc_ZdlPv || // operator delete(void*) + TLIFn == LibFunc_ZdaPv || // operator delete[](void*) + TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*) + TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*) + TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*) + TLIFn == LibFunc_msvc_delete_array_ptr64) // operator delete[](void*) ExpectedNumParams = 1; - else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint) - TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong) - TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) - TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint) - TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong) - TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) - TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint) - TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong) - TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow) - TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow) - TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint) - TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) - TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) - TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) + else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint) + TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong) + TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint) + TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong) + TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) + TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint) + TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong) + TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc_msvc_delete_array_ptr32_int || // delete[](void*, uint) + TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) + TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) + TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; else return nullptr; @@ -394,10 +394,8 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) { /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, - const TargetLibraryInfo *TLI, bool RoundToAlign, - llvm::ObjSizeMode Mode) { - ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), - RoundToAlign, Mode); + const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) { + ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts); SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); if (!Visitor.bothKnown(Data)) return false; @@ -414,19 +412,23 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize, "ObjectSize must be a call to llvm.objectsize!"); bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero(); - ObjSizeMode Mode; + ObjectSizeOpts EvalOptions; // Unless we have to fold this to something, try to be as accurate as // possible. if (MustSucceed) - Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min; + EvalOptions.EvalMode = + MaxVal ? ObjectSizeOpts::Mode::Max : ObjectSizeOpts::Mode::Min; else - Mode = ObjSizeMode::Exact; + EvalOptions.EvalMode = ObjectSizeOpts::Mode::Exact; + + EvalOptions.NullIsUnknownSize = + cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne(); // FIXME: Does it make sense to just return a failure value if the size won't // fit in the output and `!MustSucceed`? uint64_t Size; auto *ResultType = cast<IntegerType>(ObjectSize->getType()); - if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) && + if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) && isUIntN(ResultType->getBitWidth(), Size)) return ConstantInt::get(ResultType, Size); @@ -443,7 +445,7 @@ STATISTIC(ObjectVisitorLoad, APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { - if (RoundToAlign && Align) + if (Options.RoundToAlign && Align) return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align)); return Size; } @@ -451,9 +453,8 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, - bool RoundToAlign, - ObjSizeMode Mode) - : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign), Mode(Mode) { + ObjectSizeOpts Options) + : DL(DL), TLI(TLI), Options(Options) { // Pointer size must be rechecked for each object visited since it could have // a different address space. } @@ -596,7 +597,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { } SizeOffsetType -ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) { +ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) { + if (Options.NullIsUnknownSize && CPN.getType()->getAddressSpace() == 0) + return unknown(); return std::make_pair(Zero, Zero); } @@ -663,12 +666,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { if (TrueResult == FalseResult) { return TrueSide; } - if (Mode == ObjSizeMode::Min) { + if (Options.EvalMode == ObjectSizeOpts::Mode::Min) { if (TrueResult.slt(FalseResult)) return TrueSide; return FalseSide; } - if (Mode == ObjSizeMode::Max) { + if (Options.EvalMode == ObjectSizeOpts::Mode::Max) { if (TrueResult.sgt(FalseResult)) return TrueSide; return FalseSide; @@ -719,7 +722,10 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { - ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign); + ObjectSizeOpts ObjSizeOptions; + ObjSizeOptions.RoundToAlign = RoundToAlign; + + ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp index a0ae72f1415f..9db6c499129a 100644 --- a/lib/Analysis/MemoryLocation.cpp +++ b/lib/Analysis/MemoryLocation.cpp @@ -142,9 +142,9 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS, // for memcpy/memset. This is particularly important because the // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 // whenever possible. - LibFunc::Func F; + LibFunc F; if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) && - F == LibFunc::memset_pattern16 && TLI.has(F)) { + F == LibFunc_memset_pattern16 && TLI.has(F)) { assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memset_pattern16"); if (ArgIdx == 1) diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp new file mode 100644 index 000000000000..910170561abf --- /dev/null +++ b/lib/Analysis/MemorySSA.cpp @@ -0,0 +1,2059 @@ +//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the MemorySSA class. +// +//===----------------------------------------------------------------===// +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Scalar.h" +#include <algorithm> + +#define DEBUG_TYPE "memoryssa" +using namespace llvm; +INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, + true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, + true) + +INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", + "Memory SSA Printer", false, false) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", + "Memory SSA Printer", false, false) + +static cl::opt<unsigned> MaxCheckLimit( + "memssa-check-limit", cl::Hidden, cl::init(100), + cl::desc("The maximum number of stores/phis MemorySSA" + "will consider trying to walk past (default = 100)")); + +static cl::opt<bool> + VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Verify MemorySSA in legacy printer pass.")); + +namespace llvm { +/// \brief An assembly annotator class to print Memory SSA information in +/// comments. +class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { + friend class MemorySSA; + const MemorySSA *MSSA; + +public: + MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) + OS << "; " << *MA << "\n"; + } + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + OS << "; " << *MA << "\n"; + } +}; +} + +namespace { +/// Our current alias analysis API differentiates heavily between calls and +/// non-calls, and functions called on one usually assert on the other. +/// This class encapsulates the distinction to simplify other code that wants +/// "Memory affecting instructions and related data" to use as a key. +/// For example, this class is used as a densemap key in the use optimizer. +class MemoryLocOrCall { +public: + MemoryLocOrCall() : IsCall(false) {} + MemoryLocOrCall(MemoryUseOrDef *MUD) + : MemoryLocOrCall(MUD->getMemoryInst()) {} + MemoryLocOrCall(const MemoryUseOrDef *MUD) + : MemoryLocOrCall(MUD->getMemoryInst()) {} + + MemoryLocOrCall(Instruction *Inst) { + if (ImmutableCallSite(Inst)) { + IsCall = true; + CS = ImmutableCallSite(Inst); + } else { + IsCall = false; + // There is no such thing as a memorylocation for a fence inst, and it is + // unique in that regard. + if (!isa<FenceInst>(Inst)) + Loc = MemoryLocation::get(Inst); + } + } + + explicit MemoryLocOrCall(const MemoryLocation &Loc) + : IsCall(false), Loc(Loc) {} + + bool IsCall; + ImmutableCallSite getCS() const { + assert(IsCall); + return CS; + } + MemoryLocation getLoc() const { + assert(!IsCall); + return Loc; + } + + bool operator==(const MemoryLocOrCall &Other) const { + if (IsCall != Other.IsCall) + return false; + + if (IsCall) + return CS.getCalledValue() == Other.CS.getCalledValue(); + return Loc == Other.Loc; + } + +private: + union { + ImmutableCallSite CS; + MemoryLocation Loc; + }; +}; +} + +namespace llvm { +template <> struct DenseMapInfo<MemoryLocOrCall> { + static inline MemoryLocOrCall getEmptyKey() { + return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); + } + static inline MemoryLocOrCall getTombstoneKey() { + return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); + } + static unsigned getHashValue(const MemoryLocOrCall &MLOC) { + if (MLOC.IsCall) + return hash_combine(MLOC.IsCall, + DenseMapInfo<const Value *>::getHashValue( + MLOC.getCS().getCalledValue())); + return hash_combine( + MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); + } + static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { + return LHS == RHS; + } +}; + +enum class Reorderability { Always, IfNoAlias, Never }; + +/// This does one-way checks to see if Use could theoretically be hoisted above +/// MayClobber. This will not check the other way around. +/// +/// This assumes that, for the purposes of MemorySSA, Use comes directly after +/// MayClobber, with no potentially clobbering operations in between them. +/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) +static Reorderability getLoadReorderability(const LoadInst *Use, + const LoadInst *MayClobber) { + bool VolatileUse = Use->isVolatile(); + bool VolatileClobber = MayClobber->isVolatile(); + // Volatile operations may never be reordered with other volatile operations. + if (VolatileUse && VolatileClobber) + return Reorderability::Never; + + // The lang ref allows reordering of volatile and non-volatile operations. + // Whether an aliasing nonvolatile load and volatile load can be reordered, + // though, is ambiguous. Because it may not be best to exploit this ambiguity, + // we only allow volatile/non-volatile reordering if the volatile and + // non-volatile operations don't alias. + Reorderability Result = VolatileUse || VolatileClobber + ? Reorderability::IfNoAlias + : Reorderability::Always; + + // If a load is seq_cst, it cannot be moved above other loads. If its ordering + // is weaker, it can be moved above other loads. We just need to be sure that + // MayClobber isn't an acquire load, because loads can't be moved above + // acquire loads. + // + // Note that this explicitly *does* allow the free reordering of monotonic (or + // weaker) loads of the same address. + bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; + bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), + AtomicOrdering::Acquire); + if (SeqCstUse || MayClobberIsAcquire) + return Reorderability::Never; + return Result; +} + +static bool instructionClobbersQuery(MemoryDef *MD, + const MemoryLocation &UseLoc, + const Instruction *UseInst, + AliasAnalysis &AA) { + Instruction *DefInst = MD->getMemoryInst(); + assert(DefInst && "Defining instruction not actually an instruction"); + ImmutableCallSite UseCS(UseInst); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { + // These intrinsics will show up as affecting memory, but they are just + // markers. + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + if (UseCS) + return false; + return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc); + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::assume: + return false; + default: + break; + } + } + + if (UseCS) { + ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); + return I != MRI_NoModRef; + } + + if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) { + if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) { + switch (getLoadReorderability(UseLoad, DefLoad)) { + case Reorderability::Always: + return false; + case Reorderability::Never: + return true; + case Reorderability::IfNoAlias: + return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); + } + } + } + + return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; +} + +static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, + const MemoryLocOrCall &UseMLOC, + AliasAnalysis &AA) { + // FIXME: This is a temporary hack to allow a single instructionClobbersQuery + // to exist while MemoryLocOrCall is pushed through places. + if (UseMLOC.IsCall) + return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), + AA); + return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), + AA); +} + +// Return true when MD may alias MU, return false otherwise. +bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, + AliasAnalysis &AA) { + return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); +} +} + +namespace { +struct UpwardsMemoryQuery { + // True if our original query started off as a call + bool IsCall; + // The pointer location we started the query with. This will be empty if + // IsCall is true. + MemoryLocation StartingLoc; + // This is the instruction we were querying about. + const Instruction *Inst; + // The MemoryAccess we actually got called with, used to test local domination + const MemoryAccess *OriginalAccess; + + UpwardsMemoryQuery() + : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} + + UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) + : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { + if (!IsCall) + StartingLoc = MemoryLocation::get(Inst); + } +}; + +static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, + AliasAnalysis &AA) { + Instruction *Inst = MD->getMemoryInst(); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_end: + return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); + default: + return false; + } + } + return false; +} + +static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, + const Instruction *I) { + // If the memory can't be changed, then loads of the memory can't be + // clobbered. + // + // FIXME: We should handle invariant groups, as well. It's a bit harder, + // because we need to pay close attention to invariant group barriers. + return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || + AA.pointsToConstantMemory(cast<LoadInst>(I)-> + getPointerOperand())); +} + +/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing +/// inbetween `Start` and `ClobberAt` can clobbers `Start`. +/// +/// This is meant to be as simple and self-contained as possible. Because it +/// uses no cache, etc., it can be relatively expensive. +/// +/// \param Start The MemoryAccess that we want to walk from. +/// \param ClobberAt A clobber for Start. +/// \param StartLoc The MemoryLocation for Start. +/// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. +/// \param Query The UpwardsMemoryQuery we used for our search. +/// \param AA The AliasAnalysis we used for our search. +static void LLVM_ATTRIBUTE_UNUSED +checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, + const MemoryLocation &StartLoc, const MemorySSA &MSSA, + const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { + assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); + + if (MSSA.isLiveOnEntryDef(Start)) { + assert(MSSA.isLiveOnEntryDef(ClobberAt) && + "liveOnEntry must clobber itself"); + return; + } + + bool FoundClobber = false; + DenseSet<MemoryAccessPair> VisitedPhis; + SmallVector<MemoryAccessPair, 8> Worklist; + Worklist.emplace_back(Start, StartLoc); + // Walk all paths from Start to ClobberAt, while looking for clobbers. If one + // is found, complain. + while (!Worklist.empty()) { + MemoryAccessPair MAP = Worklist.pop_back_val(); + // All we care about is that nothing from Start to ClobberAt clobbers Start. + // We learn nothing from revisiting nodes. + if (!VisitedPhis.insert(MAP).second) + continue; + + for (MemoryAccess *MA : def_chain(MAP.first)) { + if (MA == ClobberAt) { + if (auto *MD = dyn_cast<MemoryDef>(MA)) { + // instructionClobbersQuery isn't essentially free, so don't use `|=`, + // since it won't let us short-circuit. + // + // Also, note that this can't be hoisted out of the `Worklist` loop, + // since MD may only act as a clobber for 1 of N MemoryLocations. + FoundClobber = + FoundClobber || MSSA.isLiveOnEntryDef(MD) || + instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); + } + break; + } + + // We should never hit liveOnEntry, unless it's the clobber. + assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); + + if (auto *MD = dyn_cast<MemoryDef>(MA)) { + (void)MD; + assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && + "Found clobber before reaching ClobberAt!"); + continue; + } + + assert(isa<MemoryPhi>(MA)); + Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); + } + } + + // If ClobberAt is a MemoryPhi, we can assume something above it acted as a + // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. + assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && + "ClobberAt never acted as a clobber"); +} + +/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up +/// in one class. +class ClobberWalker { + /// Save a few bytes by using unsigned instead of size_t. + using ListIndex = unsigned; + + /// Represents a span of contiguous MemoryDefs, potentially ending in a + /// MemoryPhi. + struct DefPath { + MemoryLocation Loc; + // Note that, because we always walk in reverse, Last will always dominate + // First. Also note that First and Last are inclusive. + MemoryAccess *First; + MemoryAccess *Last; + Optional<ListIndex> Previous; + + DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, + Optional<ListIndex> Previous) + : Loc(Loc), First(First), Last(Last), Previous(Previous) {} + + DefPath(const MemoryLocation &Loc, MemoryAccess *Init, + Optional<ListIndex> Previous) + : DefPath(Loc, Init, Init, Previous) {} + }; + + const MemorySSA &MSSA; + AliasAnalysis &AA; + DominatorTree &DT; + UpwardsMemoryQuery *Query; + + // Phi optimization bookkeeping + SmallVector<DefPath, 32> Paths; + DenseSet<ConstMemoryAccessPair> VisitedPhis; + + /// Find the nearest def or phi that `From` can legally be optimized to. + const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { + assert(From->getNumOperands() && "Phi with no operands?"); + + BasicBlock *BB = From->getBlock(); + MemoryAccess *Result = MSSA.getLiveOnEntryDef(); + DomTreeNode *Node = DT.getNode(BB); + while ((Node = Node->getIDom())) { + auto *Defs = MSSA.getBlockDefs(Node->getBlock()); + if (Defs) + return &*Defs->rbegin(); + } + return Result; + } + + /// Result of calling walkToPhiOrClobber. + struct UpwardsWalkResult { + /// The "Result" of the walk. Either a clobber, the last thing we walked, or + /// both. + MemoryAccess *Result; + bool IsKnownClobber; + }; + + /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. + /// This will update Desc.Last as it walks. It will (optionally) also stop at + /// StopAt. + /// + /// This does not test for whether StopAt is a clobber + UpwardsWalkResult + walkToPhiOrClobber(DefPath &Desc, + const MemoryAccess *StopAt = nullptr) const { + assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); + + for (MemoryAccess *Current : def_chain(Desc.Last)) { + Desc.Last = Current; + if (Current == StopAt) + return {Current, false}; + + if (auto *MD = dyn_cast<MemoryDef>(Current)) + if (MSSA.isLiveOnEntryDef(MD) || + instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) + return {MD, true}; + } + + assert(isa<MemoryPhi>(Desc.Last) && + "Ended at a non-clobber that's not a phi?"); + return {Desc.Last, false}; + } + + void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, + ListIndex PriorNode) { + auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), + upward_defs_end()); + for (const MemoryAccessPair &P : UpwardDefs) { + PausedSearches.push_back(Paths.size()); + Paths.emplace_back(P.second, P.first, PriorNode); + } + } + + /// Represents a search that terminated after finding a clobber. This clobber + /// may or may not be present in the path of defs from LastNode..SearchStart, + /// since it may have been retrieved from cache. + struct TerminatedPath { + MemoryAccess *Clobber; + ListIndex LastNode; + }; + + /// Get an access that keeps us from optimizing to the given phi. + /// + /// PausedSearches is an array of indices into the Paths array. Its incoming + /// value is the indices of searches that stopped at the last phi optimization + /// target. It's left in an unspecified state. + /// + /// If this returns None, NewPaused is a vector of searches that terminated + /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. + Optional<TerminatedPath> + getBlockingAccess(const MemoryAccess *StopWhere, + SmallVectorImpl<ListIndex> &PausedSearches, + SmallVectorImpl<ListIndex> &NewPaused, + SmallVectorImpl<TerminatedPath> &Terminated) { + assert(!PausedSearches.empty() && "No searches to continue?"); + + // BFS vs DFS really doesn't make a difference here, so just do a DFS with + // PausedSearches as our stack. + while (!PausedSearches.empty()) { + ListIndex PathIndex = PausedSearches.pop_back_val(); + DefPath &Node = Paths[PathIndex]; + + // If we've already visited this path with this MemoryLocation, we don't + // need to do so again. + // + // NOTE: That we just drop these paths on the ground makes caching + // behavior sporadic. e.g. given a diamond: + // A + // B C + // D + // + // ...If we walk D, B, A, C, we'll only cache the result of phi + // optimization for A, B, and D; C will be skipped because it dies here. + // This arguably isn't the worst thing ever, since: + // - We generally query things in a top-down order, so if we got below D + // without needing cache entries for {C, MemLoc}, then chances are + // that those cache entries would end up ultimately unused. + // - We still cache things for A, so C only needs to walk up a bit. + // If this behavior becomes problematic, we can fix without a ton of extra + // work. + if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) + continue; + + UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); + if (Res.IsKnownClobber) { + assert(Res.Result != StopWhere); + // If this wasn't a cache hit, we hit a clobber when walking. That's a + // failure. + TerminatedPath Term{Res.Result, PathIndex}; + if (!MSSA.dominates(Res.Result, StopWhere)) + return Term; + + // Otherwise, it's a valid thing to potentially optimize to. + Terminated.push_back(Term); + continue; + } + + if (Res.Result == StopWhere) { + // We've hit our target. Save this path off for if we want to continue + // walking. + NewPaused.push_back(PathIndex); + continue; + } + + assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); + addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); + } + + return None; + } + + template <typename T, typename Walker> + struct generic_def_path_iterator + : public iterator_facade_base<generic_def_path_iterator<T, Walker>, + std::forward_iterator_tag, T *> { + generic_def_path_iterator() : W(nullptr), N(None) {} + generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} + + T &operator*() const { return curNode(); } + + generic_def_path_iterator &operator++() { + N = curNode().Previous; + return *this; + } + + bool operator==(const generic_def_path_iterator &O) const { + if (N.hasValue() != O.N.hasValue()) + return false; + return !N.hasValue() || *N == *O.N; + } + + private: + T &curNode() const { return W->Paths[*N]; } + + Walker *W; + Optional<ListIndex> N; + }; + + using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; + using const_def_path_iterator = + generic_def_path_iterator<const DefPath, const ClobberWalker>; + + iterator_range<def_path_iterator> def_path(ListIndex From) { + return make_range(def_path_iterator(this, From), def_path_iterator()); + } + + iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { + return make_range(const_def_path_iterator(this, From), + const_def_path_iterator()); + } + + struct OptznResult { + /// The path that contains our result. + TerminatedPath PrimaryClobber; + /// The paths that we can legally cache back from, but that aren't + /// necessarily the result of the Phi optimization. + SmallVector<TerminatedPath, 4> OtherClobbers; + }; + + ListIndex defPathIndex(const DefPath &N) const { + // The assert looks nicer if we don't need to do &N + const DefPath *NP = &N; + assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && + "Out of bounds DefPath!"); + return NP - &Paths.front(); + } + + /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths + /// that act as legal clobbers. Note that this won't return *all* clobbers. + /// + /// Phi optimization algorithm tl;dr: + /// - Find the earliest def/phi, A, we can optimize to + /// - Find if all paths from the starting memory access ultimately reach A + /// - If not, optimization isn't possible. + /// - Otherwise, walk from A to another clobber or phi, A'. + /// - If A' is a def, we're done. + /// - If A' is a phi, try to optimize it. + /// + /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path + /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. + OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, + const MemoryLocation &Loc) { + assert(Paths.empty() && VisitedPhis.empty() && + "Reset the optimization state."); + + Paths.emplace_back(Loc, Start, Phi, None); + // Stores how many "valid" optimization nodes we had prior to calling + // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. + auto PriorPathsSize = Paths.size(); + + SmallVector<ListIndex, 16> PausedSearches; + SmallVector<ListIndex, 8> NewPaused; + SmallVector<TerminatedPath, 4> TerminatedPaths; + + addSearches(Phi, PausedSearches, 0); + + // Moves the TerminatedPath with the "most dominated" Clobber to the end of + // Paths. + auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { + assert(!Paths.empty() && "Need a path to move"); + auto Dom = Paths.begin(); + for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) + if (!MSSA.dominates(I->Clobber, Dom->Clobber)) + Dom = I; + auto Last = Paths.end() - 1; + if (Last != Dom) + std::iter_swap(Last, Dom); + }; + + MemoryPhi *Current = Phi; + while (1) { + assert(!MSSA.isLiveOnEntryDef(Current) && + "liveOnEntry wasn't treated as a clobber?"); + + const auto *Target = getWalkTarget(Current); + // If a TerminatedPath doesn't dominate Target, then it wasn't a legal + // optimization for the prior phi. + assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { + return MSSA.dominates(P.Clobber, Target); + })); + + // FIXME: This is broken, because the Blocker may be reported to be + // liveOnEntry, and we'll happily wait for that to disappear (read: never) + // For the moment, this is fine, since we do nothing with blocker info. + if (Optional<TerminatedPath> Blocker = getBlockingAccess( + Target, PausedSearches, NewPaused, TerminatedPaths)) { + + // Find the node we started at. We can't search based on N->Last, since + // we may have gone around a loop with a different MemoryLocation. + auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { + return defPathIndex(N) < PriorPathsSize; + }); + assert(Iter != def_path_iterator()); + + DefPath &CurNode = *Iter; + assert(CurNode.Last == Current); + + // Two things: + // A. We can't reliably cache all of NewPaused back. Consider a case + // where we have two paths in NewPaused; one of which can't optimize + // above this phi, whereas the other can. If we cache the second path + // back, we'll end up with suboptimal cache entries. We can handle + // cases like this a bit better when we either try to find all + // clobbers that block phi optimization, or when our cache starts + // supporting unfinished searches. + // B. We can't reliably cache TerminatedPaths back here without doing + // extra checks; consider a case like: + // T + // / \ + // D C + // \ / + // S + // Where T is our target, C is a node with a clobber on it, D is a + // diamond (with a clobber *only* on the left or right node, N), and + // S is our start. Say we walk to D, through the node opposite N + // (read: ignoring the clobber), and see a cache entry in the top + // node of D. That cache entry gets put into TerminatedPaths. We then + // walk up to C (N is later in our worklist), find the clobber, and + // quit. If we append TerminatedPaths to OtherClobbers, we'll cache + // the bottom part of D to the cached clobber, ignoring the clobber + // in N. Again, this problem goes away if we start tracking all + // blockers for a given phi optimization. + TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; + return {Result, {}}; + } + + // If there's nothing left to search, then all paths led to valid clobbers + // that we got from our cache; pick the nearest to the start, and allow + // the rest to be cached back. + if (NewPaused.empty()) { + MoveDominatedPathToEnd(TerminatedPaths); + TerminatedPath Result = TerminatedPaths.pop_back_val(); + return {Result, std::move(TerminatedPaths)}; + } + + MemoryAccess *DefChainEnd = nullptr; + SmallVector<TerminatedPath, 4> Clobbers; + for (ListIndex Paused : NewPaused) { + UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); + if (WR.IsKnownClobber) + Clobbers.push_back({WR.Result, Paused}); + else + // Micro-opt: If we hit the end of the chain, save it. + DefChainEnd = WR.Result; + } + + if (!TerminatedPaths.empty()) { + // If we couldn't find the dominating phi/liveOnEntry in the above loop, + // do it now. + if (!DefChainEnd) + for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) + DefChainEnd = MA; + + // If any of the terminated paths don't dominate the phi we'll try to + // optimize, we need to figure out what they are and quit. + const BasicBlock *ChainBB = DefChainEnd->getBlock(); + for (const TerminatedPath &TP : TerminatedPaths) { + // Because we know that DefChainEnd is as "high" as we can go, we + // don't need local dominance checks; BB dominance is sufficient. + if (DT.dominates(ChainBB, TP.Clobber->getBlock())) + Clobbers.push_back(TP); + } + } + + // If we have clobbers in the def chain, find the one closest to Current + // and quit. + if (!Clobbers.empty()) { + MoveDominatedPathToEnd(Clobbers); + TerminatedPath Result = Clobbers.pop_back_val(); + return {Result, std::move(Clobbers)}; + } + + assert(all_of(NewPaused, + [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); + + // Because liveOnEntry is a clobber, this must be a phi. + auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); + + PriorPathsSize = Paths.size(); + PausedSearches.clear(); + for (ListIndex I : NewPaused) + addSearches(DefChainPhi, PausedSearches, I); + NewPaused.clear(); + + Current = DefChainPhi; + } + } + + void verifyOptResult(const OptznResult &R) const { + assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { + return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); + })); + } + + void resetPhiOptznState() { + Paths.clear(); + VisitedPhis.clear(); + } + +public: + ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) + : MSSA(MSSA), AA(AA), DT(DT) {} + + void reset() {} + + /// Finds the nearest clobber for the given query, optimizing phis if + /// possible. + MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { + Query = &Q; + + MemoryAccess *Current = Start; + // This walker pretends uses don't exist. If we're handed one, silently grab + // its def. (This has the nice side-effect of ensuring we never cache uses) + if (auto *MU = dyn_cast<MemoryUse>(Start)) + Current = MU->getDefiningAccess(); + + DefPath FirstDesc(Q.StartingLoc, Current, Current, None); + // Fast path for the overly-common case (no crazy phi optimization + // necessary) + UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); + MemoryAccess *Result; + if (WalkResult.IsKnownClobber) { + Result = WalkResult.Result; + } else { + OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), + Current, Q.StartingLoc); + verifyOptResult(OptRes); + resetPhiOptznState(); + Result = OptRes.PrimaryClobber.Clobber; + } + +#ifdef EXPENSIVE_CHECKS + checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); +#endif + return Result; + } + + void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } +}; + +struct RenamePassData { + DomTreeNode *DTN; + DomTreeNode::const_iterator ChildIt; + MemoryAccess *IncomingVal; + + RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, + MemoryAccess *M) + : DTN(D), ChildIt(It), IncomingVal(M) {} + void swap(RenamePassData &RHS) { + std::swap(DTN, RHS.DTN); + std::swap(ChildIt, RHS.ChildIt); + std::swap(IncomingVal, RHS.IncomingVal); + } +}; +} // anonymous namespace + +namespace llvm { +/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no +/// longer does caching on its own, +/// but the name has been retained for the moment. +class MemorySSA::CachingWalker final : public MemorySSAWalker { + ClobberWalker Walker; + bool AutoResetWalker; + + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); + void verifyRemoved(MemoryAccess *); + +public: + CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); + ~CachingWalker() override; + + using MemorySSAWalker::getClobberingMemoryAccess; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &) override; + void invalidateInfo(MemoryAccess *) override; + + /// Whether we call resetClobberWalker() after each time we *actually* walk to + /// answer a clobber query. + void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } + + /// Drop the walker's persistent data structures. + void resetClobberWalker() { Walker.reset(); } + + void verify(const MemorySSA *MSSA) override { + MemorySSAWalker::verify(MSSA); + Walker.verify(MSSA); + } +}; + +void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, + bool RenameAllUses) { + // Pass through values to our successors + for (const BasicBlock *S : successors(BB)) { + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) + continue; + AccessList *Accesses = It->second.get(); + auto *Phi = cast<MemoryPhi>(&Accesses->front()); + if (RenameAllUses) { + int PhiIndex = Phi->getBasicBlockIndex(BB); + assert(PhiIndex != -1 && "Incomplete phi during partial rename"); + Phi->setIncomingValue(PhiIndex, IncomingVal); + } else + Phi->addIncoming(IncomingVal, BB); + } +} + +/// \brief Rename a single basic block into MemorySSA form. +/// Uses the standard SSA renaming algorithm. +/// \returns The new incoming value. +MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, + bool RenameAllUses) { + auto It = PerBlockAccesses.find(BB); + // Skip most processing if the list is empty. + if (It != PerBlockAccesses.end()) { + AccessList *Accesses = It->second.get(); + for (MemoryAccess &L : *Accesses) { + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { + if (MUD->getDefiningAccess() == nullptr || RenameAllUses) + MUD->setDefiningAccess(IncomingVal); + if (isa<MemoryDef>(&L)) + IncomingVal = &L; + } else { + IncomingVal = &L; + } + } + } + return IncomingVal; +} + +/// \brief This is the standard SSA renaming algorithm. +/// +/// We walk the dominator tree in preorder, renaming accesses, and then filling +/// in phi nodes in our successors. +void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, + SmallPtrSetImpl<BasicBlock *> &Visited, + bool SkipVisited, bool RenameAllUses) { + SmallVector<RenamePassData, 32> WorkStack; + // Skip everything if we already renamed this block and we are skipping. + // Note: You can't sink this into the if, because we need it to occur + // regardless of whether we skip blocks or not. + bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; + if (SkipVisited && AlreadyVisited) + return; + + IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); + renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); + WorkStack.push_back({Root, Root->begin(), IncomingVal}); + + while (!WorkStack.empty()) { + DomTreeNode *Node = WorkStack.back().DTN; + DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; + IncomingVal = WorkStack.back().IncomingVal; + + if (ChildIt == Node->end()) { + WorkStack.pop_back(); + } else { + DomTreeNode *Child = *ChildIt; + ++WorkStack.back().ChildIt; + BasicBlock *BB = Child->getBlock(); + // Note: You can't sink this into the if, because we need it to occur + // regardless of whether we skip blocks or not. + AlreadyVisited = !Visited.insert(BB).second; + if (SkipVisited && AlreadyVisited) { + // We already visited this during our renaming, which can happen when + // being asked to rename multiple blocks. Figure out the incoming val, + // which is the last def. + // Incoming value can only change if there is a block def, and in that + // case, it's the last block def in the list. + if (auto *BlockDefs = getWritableBlockDefs(BB)) + IncomingVal = &*BlockDefs->rbegin(); + } else + IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); + renameSuccessorPhis(BB, IncomingVal, RenameAllUses); + WorkStack.push_back({Child, Child->begin(), IncomingVal}); + } + } +} + +/// \brief This handles unreachable block accesses by deleting phi nodes in +/// unreachable blocks, and marking all other unreachable MemoryAccess's as +/// being uses of the live on entry definition. +void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { + assert(!DT->isReachableFromEntry(BB) && + "Reachable block found while handling unreachable blocks"); + + // Make sure phi nodes in our reachable successors end up with a + // LiveOnEntryDef for our incoming edge, even though our block is forward + // unreachable. We could just disconnect these blocks from the CFG fully, + // but we do not right now. + for (const BasicBlock *S : successors(BB)) { + if (!DT->isReachableFromEntry(S)) + continue; + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) + continue; + AccessList *Accesses = It->second.get(); + auto *Phi = cast<MemoryPhi>(&Accesses->front()); + Phi->addIncoming(LiveOnEntryDef.get(), BB); + } + + auto It = PerBlockAccesses.find(BB); + if (It == PerBlockAccesses.end()) + return; + + auto &Accesses = It->second; + for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { + auto Next = std::next(AI); + // If we have a phi, just remove it. We are going to replace all + // users with live on entry. + if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) + UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); + else + Accesses->erase(AI); + AI = Next; + } +} + +MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) + : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), + NextID(INVALID_MEMORYACCESS_ID) { + buildMemorySSA(); +} + +MemorySSA::~MemorySSA() { + // Drop all our references + for (const auto &Pair : PerBlockAccesses) + for (MemoryAccess &MA : *Pair.second) + MA.dropAllReferences(); +} + +MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { + auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); + + if (Res.second) + Res.first->second = make_unique<AccessList>(); + return Res.first->second.get(); +} +MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { + auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); + + if (Res.second) + Res.first->second = make_unique<DefsList>(); + return Res.first->second.get(); +} + +/// This class is a batch walker of all MemoryUse's in the program, and points +/// their defining access at the thing that actually clobbers them. Because it +/// is a batch walker that touches everything, it does not operate like the +/// other walkers. This walker is basically performing a top-down SSA renaming +/// pass, where the version stack is used as the cache. This enables it to be +/// significantly more time and memory efficient than using the regular walker, +/// which is walking bottom-up. +class MemorySSA::OptimizeUses { +public: + OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, + DominatorTree *DT) + : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { + Walker = MSSA->getWalker(); + } + + void optimizeUses(); + +private: + /// This represents where a given memorylocation is in the stack. + struct MemlocStackInfo { + // This essentially is keeping track of versions of the stack. Whenever + // the stack changes due to pushes or pops, these versions increase. + unsigned long StackEpoch; + unsigned long PopEpoch; + // This is the lower bound of places on the stack to check. It is equal to + // the place the last stack walk ended. + // Note: Correctness depends on this being initialized to 0, which densemap + // does + unsigned long LowerBound; + const BasicBlock *LowerBoundBlock; + // This is where the last walk for this memory location ended. + unsigned long LastKill; + bool LastKillValid; + }; + void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, + SmallVectorImpl<MemoryAccess *> &, + DenseMap<MemoryLocOrCall, MemlocStackInfo> &); + MemorySSA *MSSA; + MemorySSAWalker *Walker; + AliasAnalysis *AA; + DominatorTree *DT; +}; + +/// Optimize the uses in a given block This is basically the SSA renaming +/// algorithm, with one caveat: We are able to use a single stack for all +/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is +/// the same for every MemoryUse. The *actual* clobbering MemoryDef is just +/// going to be some position in that stack of possible ones. +/// +/// We track the stack positions that each MemoryLocation needs +/// to check, and last ended at. This is because we only want to check the +/// things that changed since last time. The same MemoryLocation should +/// get clobbered by the same store (getModRefInfo does not use invariantness or +/// things like this, and if they start, we can modify MemoryLocOrCall to +/// include relevant data) +void MemorySSA::OptimizeUses::optimizeUsesInBlock( + const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, + SmallVectorImpl<MemoryAccess *> &VersionStack, + DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { + + /// If no accesses, nothing to do. + MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); + if (Accesses == nullptr) + return; + + // Pop everything that doesn't dominate the current block off the stack, + // increment the PopEpoch to account for this. + while (true) { + assert( + !VersionStack.empty() && + "Version stack should have liveOnEntry sentinel dominating everything"); + BasicBlock *BackBlock = VersionStack.back()->getBlock(); + if (DT->dominates(BackBlock, BB)) + break; + while (VersionStack.back()->getBlock() == BackBlock) + VersionStack.pop_back(); + ++PopEpoch; + } + + for (MemoryAccess &MA : *Accesses) { + auto *MU = dyn_cast<MemoryUse>(&MA); + if (!MU) { + VersionStack.push_back(&MA); + ++StackEpoch; + continue; + } + + if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { + MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); + continue; + } + + MemoryLocOrCall UseMLOC(MU); + auto &LocInfo = LocStackInfo[UseMLOC]; + // If the pop epoch changed, it means we've removed stuff from top of + // stack due to changing blocks. We may have to reset the lower bound or + // last kill info. + if (LocInfo.PopEpoch != PopEpoch) { + LocInfo.PopEpoch = PopEpoch; + LocInfo.StackEpoch = StackEpoch; + // If the lower bound was in something that no longer dominates us, we + // have to reset it. + // We can't simply track stack size, because the stack may have had + // pushes/pops in the meantime. + // XXX: This is non-optimal, but only is slower cases with heavily + // branching dominator trees. To get the optimal number of queries would + // be to make lowerbound and lastkill a per-loc stack, and pop it until + // the top of that stack dominates us. This does not seem worth it ATM. + // A much cheaper optimization would be to always explore the deepest + // branch of the dominator tree first. This will guarantee this resets on + // the smallest set of blocks. + if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && + !DT->dominates(LocInfo.LowerBoundBlock, BB)) { + // Reset the lower bound of things to check. + // TODO: Some day we should be able to reset to last kill, rather than + // 0. + LocInfo.LowerBound = 0; + LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); + LocInfo.LastKillValid = false; + } + } else if (LocInfo.StackEpoch != StackEpoch) { + // If all that has changed is the StackEpoch, we only have to check the + // new things on the stack, because we've checked everything before. In + // this case, the lower bound of things to check remains the same. + LocInfo.PopEpoch = PopEpoch; + LocInfo.StackEpoch = StackEpoch; + } + if (!LocInfo.LastKillValid) { + LocInfo.LastKill = VersionStack.size() - 1; + LocInfo.LastKillValid = true; + } + + // At this point, we should have corrected last kill and LowerBound to be + // in bounds. + assert(LocInfo.LowerBound < VersionStack.size() && + "Lower bound out of range"); + assert(LocInfo.LastKill < VersionStack.size() && + "Last kill info out of range"); + // In any case, the new upper bound is the top of the stack. + unsigned long UpperBound = VersionStack.size() - 1; + + if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { + DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" + << *(MU->getMemoryInst()) << ")" + << " because there are " << UpperBound - LocInfo.LowerBound + << " stores to disambiguate\n"); + // Because we did not walk, LastKill is no longer valid, as this may + // have been a kill. + LocInfo.LastKillValid = false; + continue; + } + bool FoundClobberResult = false; + while (UpperBound > LocInfo.LowerBound) { + if (isa<MemoryPhi>(VersionStack[UpperBound])) { + // For phis, use the walker, see where we ended up, go there + Instruction *UseInst = MU->getMemoryInst(); + MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); + // We are guaranteed to find it or something is wrong + while (VersionStack[UpperBound] != Result) { + assert(UpperBound != 0); + --UpperBound; + } + FoundClobberResult = true; + break; + } + + MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); + // If the lifetime of the pointer ends at this instruction, it's live on + // entry. + if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { + // Reset UpperBound to liveOnEntryDef's place in the stack + UpperBound = 0; + FoundClobberResult = true; + break; + } + if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) { + FoundClobberResult = true; + break; + } + --UpperBound; + } + // At the end of this loop, UpperBound is either a clobber, or lower bound + // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. + if (FoundClobberResult || UpperBound < LocInfo.LastKill) { + MU->setDefiningAccess(VersionStack[UpperBound], true); + // We were last killed now by where we got to + LocInfo.LastKill = UpperBound; + } else { + // Otherwise, we checked all the new ones, and now we know we can get to + // LastKill. + MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); + } + LocInfo.LowerBound = VersionStack.size() - 1; + LocInfo.LowerBoundBlock = BB; + } +} + +/// Optimize uses to point to their actual clobbering definitions. +void MemorySSA::OptimizeUses::optimizeUses() { + SmallVector<MemoryAccess *, 16> VersionStack; + DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; + VersionStack.push_back(MSSA->getLiveOnEntryDef()); + + unsigned long StackEpoch = 1; + unsigned long PopEpoch = 1; + // We perform a non-recursive top-down dominator tree walk. + for (const auto *DomNode : depth_first(DT->getRootNode())) + optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, + LocStackInfo); +} + +void MemorySSA::placePHINodes( + const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, + const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { + // Determine where our MemoryPhi's should go + ForwardIDFCalculator IDFs(*DT); + IDFs.setDefiningBlocks(DefiningBlocks); + SmallVector<BasicBlock *, 32> IDFBlocks; + IDFs.calculate(IDFBlocks); + + std::sort(IDFBlocks.begin(), IDFBlocks.end(), + [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { + return BBNumbers.lookup(A) < BBNumbers.lookup(B); + }); + + // Now place MemoryPhi nodes. + for (auto &BB : IDFBlocks) + createMemoryPhi(BB); +} + +void MemorySSA::buildMemorySSA() { + // We create an access to represent "live on entry", for things like + // arguments or users of globals, where the memory they use is defined before + // the beginning of the function. We do not actually insert it into the IR. + // We do not define a live on exit for the immediate uses, and thus our + // semantics do *not* imply that something with no immediate uses can simply + // be removed. + BasicBlock &StartingPoint = F.getEntryBlock(); + LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, + &StartingPoint, NextID++); + DenseMap<const BasicBlock *, unsigned int> BBNumbers; + unsigned NextBBNum = 0; + + // We maintain lists of memory accesses per-block, trading memory for time. We + // could just look up the memory access for every possible instruction in the + // stream. + SmallPtrSet<BasicBlock *, 32> DefiningBlocks; + SmallPtrSet<BasicBlock *, 32> DefUseBlocks; + // Go through each block, figure out where defs occur, and chain together all + // the accesses. + for (BasicBlock &B : F) { + BBNumbers[&B] = NextBBNum++; + bool InsertIntoDef = false; + AccessList *Accesses = nullptr; + DefsList *Defs = nullptr; + for (Instruction &I : B) { + MemoryUseOrDef *MUD = createNewAccess(&I); + if (!MUD) + continue; + + if (!Accesses) + Accesses = getOrCreateAccessList(&B); + Accesses->push_back(MUD); + if (isa<MemoryDef>(MUD)) { + InsertIntoDef = true; + if (!Defs) + Defs = getOrCreateDefsList(&B); + Defs->push_back(*MUD); + } + } + if (InsertIntoDef) + DefiningBlocks.insert(&B); + if (Accesses) + DefUseBlocks.insert(&B); + } + placePHINodes(DefiningBlocks, BBNumbers); + + // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get + // filled in with all blocks. + SmallPtrSet<BasicBlock *, 16> Visited; + renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); + + CachingWalker *Walker = getWalkerImpl(); + + // We're doing a batch of updates; don't drop useful caches between them. + Walker->setAutoResetWalker(false); + OptimizeUses(this, Walker, AA, DT).optimizeUses(); + Walker->setAutoResetWalker(true); + Walker->resetClobberWalker(); + + // Mark the uses in unreachable blocks as live on entry, so that they go + // somewhere. + for (auto &BB : F) + if (!Visited.count(&BB)) + markUnreachableAsLiveOnEntry(&BB); +} + +MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } + +MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { + if (Walker) + return Walker.get(); + + Walker = make_unique<CachingWalker>(this, AA, DT); + return Walker.get(); +} + +// This is a helper function used by the creation routines. It places NewAccess +// into the access and defs lists for a given basic block, at the given +// insertion point. +void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, + const BasicBlock *BB, + InsertionPlace Point) { + auto *Accesses = getOrCreateAccessList(BB); + if (Point == Beginning) { + // If it's a phi node, it goes first, otherwise, it goes after any phi + // nodes. + if (isa<MemoryPhi>(NewAccess)) { + Accesses->push_front(NewAccess); + auto *Defs = getOrCreateDefsList(BB); + Defs->push_front(*NewAccess); + } else { + auto AI = find_if_not( + *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); + Accesses->insert(AI, NewAccess); + if (!isa<MemoryUse>(NewAccess)) { + auto *Defs = getOrCreateDefsList(BB); + auto DI = find_if_not( + *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); + Defs->insert(DI, *NewAccess); + } + } + } else { + Accesses->push_back(NewAccess); + if (!isa<MemoryUse>(NewAccess)) { + auto *Defs = getOrCreateDefsList(BB); + Defs->push_back(*NewAccess); + } + } + BlockNumberingValid.erase(BB); +} + +void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, + AccessList::iterator InsertPt) { + auto *Accesses = getWritableBlockAccesses(BB); + bool WasEnd = InsertPt == Accesses->end(); + Accesses->insert(AccessList::iterator(InsertPt), What); + if (!isa<MemoryUse>(What)) { + auto *Defs = getOrCreateDefsList(BB); + // If we got asked to insert at the end, we have an easy job, just shove it + // at the end. If we got asked to insert before an existing def, we also get + // an terator. If we got asked to insert before a use, we have to hunt for + // the next def. + if (WasEnd) { + Defs->push_back(*What); + } else if (isa<MemoryDef>(InsertPt)) { + Defs->insert(InsertPt->getDefsIterator(), *What); + } else { + while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) + ++InsertPt; + // Either we found a def, or we are inserting at the end + if (InsertPt == Accesses->end()) + Defs->push_back(*What); + else + Defs->insert(InsertPt->getDefsIterator(), *What); + } + } + BlockNumberingValid.erase(BB); +} + +// Move What before Where in the IR. The end result is taht What will belong to +// the right lists and have the right Block set, but will not otherwise be +// correct. It will not have the right defining access, and if it is a def, +// things below it will not properly be updated. +void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, + AccessList::iterator Where) { + // Keep it in the lookup tables, remove from the lists + removeFromLists(What, false); + What->setBlock(BB); + insertIntoListsBefore(What, BB, Where); +} + +void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, + InsertionPlace Point) { + removeFromLists(What, false); + What->setBlock(BB); + insertIntoListsForBlock(What, BB, Point); +} + +MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { + assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); + // Phi's always are placed at the front of the block. + insertIntoListsForBlock(Phi, BB, Beginning); + ValueToMemoryAccess[BB] = Phi; + return Phi; +} + +MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, + MemoryAccess *Definition) { + assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); + MemoryUseOrDef *NewAccess = createNewAccess(I); + assert( + NewAccess != nullptr && + "Tried to create a memory access for a non-memory touching instruction"); + NewAccess->setDefiningAccess(Definition); + return NewAccess; +} + +// Return true if the instruction has ordering constraints. +// Note specifically that this only considers stores and loads +// because others are still considered ModRef by getModRefInfo. +static inline bool isOrdered(const Instruction *I) { + if (auto *SI = dyn_cast<StoreInst>(I)) { + if (!SI->isUnordered()) + return true; + } else if (auto *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isUnordered()) + return true; + } + return false; +} +/// \brief Helper function to create new memory accesses +MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { + // The assume intrinsic has a control dependency which we model by claiming + // that it writes arbitrarily. Ignore that fake memory dependency here. + // FIXME: Replace this special casing with a more accurate modelling of + // assume's control dependency. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::assume) + return nullptr; + + // Find out what affect this instruction has on memory. + ModRefInfo ModRef = AA->getModRefInfo(I); + // The isOrdered check is used to ensure that volatiles end up as defs + // (atomics end up as ModRef right now anyway). Until we separate the + // ordering chain from the memory chain, this enables people to see at least + // some relative ordering to volatiles. Note that getClobberingMemoryAccess + // will still give an answer that bypasses other volatile loads. TODO: + // Separate memory aliasing and ordering into two different chains so that we + // can precisely represent both "what memory will this read/write/is clobbered + // by" and "what instructions can I move this past". + bool Def = bool(ModRef & MRI_Mod) || isOrdered(I); + bool Use = bool(ModRef & MRI_Ref); + + // It's possible for an instruction to not modify memory at all. During + // construction, we ignore them. + if (!Def && !Use) + return nullptr; + + assert((Def || Use) && + "Trying to create a memory access with a non-memory instruction"); + + MemoryUseOrDef *MUD; + if (Def) + MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); + else + MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); + ValueToMemoryAccess[I] = MUD; + return MUD; +} + +/// \brief Returns true if \p Replacer dominates \p Replacee . +bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, + const MemoryAccess *Replacee) const { + if (isa<MemoryUseOrDef>(Replacee)) + return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); + const auto *MP = cast<MemoryPhi>(Replacee); + // For a phi node, the use occurs in the predecessor block of the phi node. + // Since we may occur multiple times in the phi node, we have to check each + // operand to ensure Replacer dominates each operand where Replacee occurs. + for (const Use &Arg : MP->operands()) { + if (Arg.get() != Replacee && + !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) + return false; + } + return true; +} + +/// \brief Properly remove \p MA from all of MemorySSA's lookup tables. +void MemorySSA::removeFromLookups(MemoryAccess *MA) { + assert(MA->use_empty() && + "Trying to remove memory access that still has uses"); + BlockNumbering.erase(MA); + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) + MUD->setDefiningAccess(nullptr); + // Invalidate our walker's cache if necessary + if (!isa<MemoryUse>(MA)) + Walker->invalidateInfo(MA); + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + Value *MemoryInst; + if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { + MemoryInst = MUD->getMemoryInst(); + } else { + MemoryInst = MA->getBlock(); + } + auto VMA = ValueToMemoryAccess.find(MemoryInst); + if (VMA->second == MA) + ValueToMemoryAccess.erase(VMA); +} + +/// \brief Properly remove \p MA from all of MemorySSA's lists. +/// +/// Because of the way the intrusive list and use lists work, it is important to +/// do removal in the right order. +/// ShouldDelete defaults to true, and will cause the memory access to also be +/// deleted, not just removed. +void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { + // The access list owns the reference, so we erase it from the non-owning list + // first. + if (!isa<MemoryUse>(MA)) { + auto DefsIt = PerBlockDefs.find(MA->getBlock()); + std::unique_ptr<DefsList> &Defs = DefsIt->second; + Defs->remove(*MA); + if (Defs->empty()) + PerBlockDefs.erase(DefsIt); + } + + // The erase call here will delete it. If we don't want it deleted, we call + // remove instead. + auto AccessIt = PerBlockAccesses.find(MA->getBlock()); + std::unique_ptr<AccessList> &Accesses = AccessIt->second; + if (ShouldDelete) + Accesses->erase(MA); + else + Accesses->remove(MA); + + if (Accesses->empty()) + PerBlockAccesses.erase(AccessIt); +} + +void MemorySSA::print(raw_ostream &OS) const { + MemorySSAAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } +#endif + +void MemorySSA::verifyMemorySSA() const { + verifyDefUses(F); + verifyDomination(F); + verifyOrdering(F); + Walker->verify(this); +} + +/// \brief Verify that the order and existence of MemoryAccesses matches the +/// order and existence of memory affecting instructions. +void MemorySSA::verifyOrdering(Function &F) const { + // Walk all the blocks, comparing what the lookups think and what the access + // lists think, as well as the order in the blocks vs the order in the access + // lists. + SmallVector<MemoryAccess *, 32> ActualAccesses; + SmallVector<MemoryAccess *, 32> ActualDefs; + for (BasicBlock &B : F) { + const AccessList *AL = getBlockAccesses(&B); + const auto *DL = getBlockDefs(&B); + MemoryAccess *Phi = getMemoryAccess(&B); + if (Phi) { + ActualAccesses.push_back(Phi); + ActualDefs.push_back(Phi); + } + + for (Instruction &I : B) { + MemoryAccess *MA = getMemoryAccess(&I); + assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && + "We have memory affecting instructions " + "in this block but they are not in the " + "access list or defs list"); + if (MA) { + ActualAccesses.push_back(MA); + if (isa<MemoryDef>(MA)) + ActualDefs.push_back(MA); + } + } + // Either we hit the assert, really have no accesses, or we have both + // accesses and an access list. + // Same with defs. + if (!AL && !DL) + continue; + assert(AL->size() == ActualAccesses.size() && + "We don't have the same number of accesses in the block as on the " + "access list"); + assert((DL || ActualDefs.size() == 0) && + "Either we should have a defs list, or we should have no defs"); + assert((!DL || DL->size() == ActualDefs.size()) && + "We don't have the same number of defs in the block as on the " + "def list"); + auto ALI = AL->begin(); + auto AAI = ActualAccesses.begin(); + while (ALI != AL->end() && AAI != ActualAccesses.end()) { + assert(&*ALI == *AAI && "Not the same accesses in the same order"); + ++ALI; + ++AAI; + } + ActualAccesses.clear(); + if (DL) { + auto DLI = DL->begin(); + auto ADI = ActualDefs.begin(); + while (DLI != DL->end() && ADI != ActualDefs.end()) { + assert(&*DLI == *ADI && "Not the same defs in the same order"); + ++DLI; + ++ADI; + } + } + ActualDefs.clear(); + } +} + +/// \brief Verify the domination properties of MemorySSA by checking that each +/// definition dominates all of its uses. +void MemorySSA::verifyDomination(Function &F) const { +#ifndef NDEBUG + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *MP = getMemoryAccess(&B)) + for (const Use &U : MP->uses()) + assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); + + for (Instruction &I : B) { + MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); + if (!MD) + continue; + + for (const Use &U : MD->uses()) + assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); + } + } +#endif +} + +/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use +/// appears in the use list of \p Def. + +void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { +#ifndef NDEBUG + // The live on entry use may cause us to get a NULL def here + if (!Def) + assert(isLiveOnEntryDef(Use) && + "Null def but use not point to live on entry def"); + else + assert(is_contained(Def->users(), Use) && + "Did not find use in def's use list"); +#endif +} + +/// \brief Verify the immediate use information, by walking all the memory +/// accesses and verifying that, for each use, it appears in the +/// appropriate def's use list +void MemorySSA::verifyDefUses(Function &F) const { + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *Phi = getMemoryAccess(&B)) { + assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( + pred_begin(&B), pred_end(&B))) && + "Incomplete MemoryPhi Node"); + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + verifyUseInDefs(Phi->getIncomingValue(I), Phi); + } + + for (Instruction &I : B) { + if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { + verifyUseInDefs(MA->getDefiningAccess(), MA); + } + } + } +} + +MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { + return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); +} + +MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { + return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); +} + +/// Perform a local numbering on blocks so that instruction ordering can be +/// determined in constant time. +/// TODO: We currently just number in order. If we numbered by N, we could +/// allow at least N-1 sequences of insertBefore or insertAfter (and at least +/// log2(N) sequences of mixed before and after) without needing to invalidate +/// the numbering. +void MemorySSA::renumberBlock(const BasicBlock *B) const { + // The pre-increment ensures the numbers really start at 1. + unsigned long CurrentNumber = 0; + const AccessList *AL = getBlockAccesses(B); + assert(AL != nullptr && "Asking to renumber an empty block"); + for (const auto &I : *AL) + BlockNumbering[&I] = ++CurrentNumber; + BlockNumberingValid.insert(B); +} + +/// \brief Determine, for two memory accesses in the same block, +/// whether \p Dominator dominates \p Dominatee. +/// \returns True if \p Dominator dominates \p Dominatee. +bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, + const MemoryAccess *Dominatee) const { + + const BasicBlock *DominatorBlock = Dominator->getBlock(); + + assert((DominatorBlock == Dominatee->getBlock()) && + "Asking for local domination when accesses are in different blocks!"); + // A node dominates itself. + if (Dominatee == Dominator) + return true; + + // When Dominatee is defined on function entry, it is not dominated by another + // memory access. + if (isLiveOnEntryDef(Dominatee)) + return false; + + // When Dominator is defined on function entry, it dominates the other memory + // access. + if (isLiveOnEntryDef(Dominator)) + return true; + + if (!BlockNumberingValid.count(DominatorBlock)) + renumberBlock(DominatorBlock); + + unsigned long DominatorNum = BlockNumbering.lookup(Dominator); + // All numbers start with 1 + assert(DominatorNum != 0 && "Block was not numbered properly"); + unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); + assert(DominateeNum != 0 && "Block was not numbered properly"); + return DominatorNum < DominateeNum; +} + +bool MemorySSA::dominates(const MemoryAccess *Dominator, + const MemoryAccess *Dominatee) const { + if (Dominator == Dominatee) + return true; + + if (isLiveOnEntryDef(Dominatee)) + return false; + + if (Dominator->getBlock() != Dominatee->getBlock()) + return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); + return locallyDominates(Dominator, Dominatee); +} + +bool MemorySSA::dominates(const MemoryAccess *Dominator, + const Use &Dominatee) const { + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { + BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); + // The def must dominate the incoming block of the phi. + if (UseBB != Dominator->getBlock()) + return DT->dominates(Dominator->getBlock(), UseBB); + // If the UseBB and the DefBB are the same, compare locally. + return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); + } + // If it's not a PHI node use, the normal dominates can already handle it. + return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); +} + +const static char LiveOnEntryStr[] = "liveOnEntry"; + +void MemoryDef::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + + OS << getID() << " = MemoryDef("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryPhi::print(raw_ostream &OS) const { + bool First = true; + OS << getID() << " = MemoryPhi("; + for (const auto &Op : operands()) { + BasicBlock *BB = getIncomingBlock(Op); + MemoryAccess *MA = cast<MemoryAccess>(Op); + if (!First) + OS << ','; + else + First = false; + + OS << '{'; + if (BB->hasName()) + OS << BB->getName(); + else + BB->printAsOperand(OS, false); + OS << ','; + if (unsigned ID = MA->getID()) + OS << ID; + else + OS << LiveOnEntryStr; + OS << '}'; + } + OS << ')'; +} + +MemoryAccess::~MemoryAccess() {} + +void MemoryUse::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + OS << "MemoryUse("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryAccess::dump() const { +// Cannot completely remove virtual function even in release mode. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + print(dbgs()); + dbgs() << "\n"; +#endif +} + +char MemorySSAPrinterLegacyPass::ID = 0; + +MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { + initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); +} + +bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { + auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); + MSSA.print(dbgs()); + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); + return false; +} + +AnalysisKey MemorySSAAnalysis::Key; + +MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT)); +} + +PreservedAnalyses MemorySSAPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + OS << "MemorySSA for function: " << F.getName() << "\n"; + AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); + + return PreservedAnalyses::all(); +} + +PreservedAnalyses MemorySSAVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); + + return PreservedAnalyses::all(); +} + +char MemorySSAWrapperPass::ID = 0; + +MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { + initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } + +void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequiredTransitive<AAResultsWrapperPass>(); +} + +bool MemorySSAWrapperPass::runOnFunction(Function &F) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MSSA.reset(new MemorySSA(F, &AA, &DT)); + return false; +} + +void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } + +void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { + MSSA->print(OS); +} + +MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} + +MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, + DominatorTree *D) + : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {} + +MemorySSA::CachingWalker::~CachingWalker() {} + +void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { + if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) + MUD->resetOptimized(); +} + +/// \brief Walk the use-def chains starting at \p MA and find +/// the MemoryAccess that actually clobbers Loc. +/// +/// \returns our clobbering memory access +MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { + MemoryAccess *New = Walker.findClobber(StartingAccess, Q); +#ifdef EXPENSIVE_CHECKS + MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); + assert(NewNoCache == New && "Cache made us hand back a different result?"); +#endif + if (AutoResetWalker) + resetClobberWalker(); + return New; +} + +MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, const MemoryLocation &Loc) { + if (isa<MemoryPhi>(StartingAccess)) + return StartingAccess; + + auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); + if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) + return StartingUseOrDef; + + Instruction *I = StartingUseOrDef->getMemoryInst(); + + // Conservatively, fences are always clobbers, so don't perform the walk if we + // hit a fence. + if (!ImmutableCallSite(I) && I->isFenceLike()) + return StartingUseOrDef; + + UpwardsMemoryQuery Q; + Q.OriginalAccess = StartingUseOrDef; + Q.StartingLoc = Loc; + Q.Inst = I; + Q.IsCall = false; + + // Unlike the other function, do not walk to the def of a def, because we are + // handed something we already believe is the clobbering access. + MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) + ? StartingUseOrDef->getDefiningAccess() + : StartingUseOrDef; + + MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *StartingUseOrDef << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Clobber << "\n"); + return Clobber; +} + +MemoryAccess * +MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { + auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); + // If this is a MemoryPhi, we can't do anything. + if (!StartingAccess) + return MA; + + // If this is an already optimized use or def, return the optimized result. + // Note: Currently, we do not store the optimized def result because we'd need + // a separate field, since we can't use it as the defining access. + if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) + if (MUD->isOptimized()) + return MUD->getOptimized(); + + const Instruction *I = StartingAccess->getMemoryInst(); + UpwardsMemoryQuery Q(I, StartingAccess); + // We can't sanely do anything with a fences, they conservatively + // clobber all memory, and have no locations to get pointers from to + // try to disambiguate. + if (!Q.IsCall && I->isFenceLike()) + return StartingAccess; + + if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { + MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); + if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) + MUD->setOptimized(LiveOnEntry); + return LiveOnEntry; + } + + // Start with the thing we already think clobbers this location + MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); + + // At this point, DefiningAccess may be the live on entry def. + // If it is, we will not get a better result. + if (MSSA->isLiveOnEntryDef(DefiningAccess)) + return DefiningAccess; + + MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *DefiningAccess << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Result << "\n"); + if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) + MUD->setOptimized(Result); + + return Result; +} + +MemoryAccess * +DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { + if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) + return Use->getDefiningAccess(); + return MA; +} + +MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, const MemoryLocation &) { + if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) + return Use->getDefiningAccess(); + return StartingAccess; +} +} // namespace llvm diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp new file mode 100644 index 000000000000..c63677fe5502 --- /dev/null +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -0,0 +1,494 @@ +//===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the MemorySSAUpdater class. +// +//===----------------------------------------------------------------===// +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Analysis/MemorySSA.h" +#include <algorithm> + +#define DEBUG_TYPE "memoryssa" +using namespace llvm; +namespace llvm { +// This is the marker algorithm from "Simple and Efficient Construction of +// Static Single Assignment Form" +// The simple, non-marker algorithm places phi nodes at any join +// Here, we place markers, and only place phi nodes if they end up necessary. +// They are only necessary if they break a cycle (IE we recursively visit +// ourselves again), or we discover, while getting the value of the operands, +// that there are two or more definitions needing to be merged. +// This still will leave non-minimal form in the case of irreducible control +// flow, where phi nodes may be in cycles with themselves, but unnecessary. +MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) { + // Single predecessor case, just recurse, we can only have one definition. + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + return getPreviousDefFromEnd(Pred); + } else if (VisitedBlocks.count(BB)) { + // We hit our node again, meaning we had a cycle, we must insert a phi + // node to break it so we have an operand. The only case this will + // insert useless phis is if we have irreducible control flow. + return MSSA->createMemoryPhi(BB); + } else if (VisitedBlocks.insert(BB).second) { + // Mark us visited so we can detect a cycle + SmallVector<MemoryAccess *, 8> PhiOps; + + // Recurse to get the values in our predecessors for placement of a + // potential phi node. This will insert phi nodes if we cycle in order to + // break the cycle and have an operand. + for (auto *Pred : predecessors(BB)) + PhiOps.push_back(getPreviousDefFromEnd(Pred)); + + // Now try to simplify the ops to avoid placing a phi. + // This may return null if we never created a phi yet, that's okay + MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB)); + bool PHIExistsButNeedsUpdate = false; + // See if the existing phi operands match what we need. + // Unlike normal SSA, we only allow one phi node per block, so we can't just + // create a new one. + if (Phi && Phi->getNumOperands() != 0) + if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) { + PHIExistsButNeedsUpdate = true; + } + + // See if we can avoid the phi by simplifying it. + auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); + // If we couldn't simplify, we may have to create a phi + if (Result == Phi) { + if (!Phi) + Phi = MSSA->createMemoryPhi(BB); + + // These will have been filled in by the recursive read we did above. + if (PHIExistsButNeedsUpdate) { + std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin()); + std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin()); + } else { + unsigned i = 0; + for (auto *Pred : predecessors(BB)) + Phi->addIncoming(PhiOps[i++], Pred); + } + + Result = Phi; + } + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Result)) + InsertedPHIs.push_back(MP); + // Set ourselves up for the next variable by resetting visited state. + VisitedBlocks.erase(BB); + return Result; + } + llvm_unreachable("Should have hit one of the three cases above"); +} + +// This starts at the memory access, and goes backwards in the block to find the +// previous definition. If a definition is not found the block of the access, +// it continues globally, creating phi nodes to ensure we have a single +// definition. +MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) { + auto *LocalResult = getPreviousDefInBlock(MA); + + return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock()); +} + +// This starts at the memory access, and goes backwards in the block to the find +// the previous definition. If the definition is not found in the block of the +// access, it returns nullptr. +MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { + auto *Defs = MSSA->getWritableBlockDefs(MA->getBlock()); + + // It's possible there are no defs, or we got handed the first def to start. + if (Defs) { + // If this is a def, we can just use the def iterators. + if (!isa<MemoryUse>(MA)) { + auto Iter = MA->getReverseDefsIterator(); + ++Iter; + if (Iter != Defs->rend()) + return &*Iter; + } else { + // Otherwise, have to walk the all access iterator. + auto Iter = MA->getReverseIterator(); + ++Iter; + while (&*Iter != &*Defs->begin()) { + if (!isa<MemoryUse>(*Iter)) + return &*Iter; + --Iter; + } + // At this point it must be pointing at firstdef + assert(&*Iter == &*Defs->begin() && + "Should have hit first def walking backwards"); + return &*Iter; + } + } + return nullptr; +} + +// This starts at the end of block +MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) { + auto *Defs = MSSA->getWritableBlockDefs(BB); + + if (Defs) + return &*Defs->rbegin(); + + return getPreviousDefRecursive(BB); +} +// Recurse over a set of phi uses to eliminate the trivial ones +MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { + if (!Phi) + return nullptr; + TrackingVH<MemoryAccess> Res(Phi); + SmallVector<TrackingVH<Value>, 8> Uses; + std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses)); + for (auto &U : Uses) { + if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) { + auto OperRange = UsePhi->operands(); + tryRemoveTrivialPhi(UsePhi, OperRange); + } + } + return Res; +} + +// Eliminate trivial phis +// Phis are trivial if they are defined either by themselves, or all the same +// argument. +// IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c) +// We recursively try to remove them. +template <class RangeType> +MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, + RangeType &Operands) { + // Detect equal or self arguments + MemoryAccess *Same = nullptr; + for (auto &Op : Operands) { + // If the same or self, good so far + if (Op == Phi || Op == Same) + continue; + // not the same, return the phi since it's not eliminatable by us + if (Same) + return Phi; + Same = cast<MemoryAccess>(Op); + } + // Never found a non-self reference, the phi is undef + if (Same == nullptr) + return MSSA->getLiveOnEntryDef(); + if (Phi) { + Phi->replaceAllUsesWith(Same); + removeMemoryAccess(Phi); + } + + // We should only end up recursing in case we replaced something, in which + // case, we may have made other Phis trivial. + return recursePhi(Same); +} + +void MemorySSAUpdater::insertUse(MemoryUse *MU) { + InsertedPHIs.clear(); + MU->setDefiningAccess(getPreviousDef(MU)); + // Unlike for defs, there is no extra work to do. Because uses do not create + // new may-defs, there are only two cases: + // + // 1. There was a def already below us, and therefore, we should not have + // created a phi node because it was already needed for the def. + // + // 2. There is no def below us, and therefore, there is no extra renaming work + // to do. +} + +// Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. +void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, + MemoryAccess *NewDef) { + // Replace any operand with us an incoming block with the new defining + // access. + int i = MP->getBasicBlockIndex(BB); + assert(i != -1 && "Should have found the basic block in the phi"); + // We can't just compare i against getNumOperands since one is signed and the + // other not. So use it to index into the block iterator. + for (auto BBIter = MP->block_begin() + i; BBIter != MP->block_end(); + ++BBIter) { + if (*BBIter != BB) + break; + MP->setIncomingValue(i, NewDef); + ++i; + } +} + +// A brief description of the algorithm: +// First, we compute what should define the new def, using the SSA +// construction algorithm. +// Then, we update the defs below us (and any new phi nodes) in the graph to +// point to the correct new defs, to ensure we only have one variable, and no +// disconnected stores. +void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { + InsertedPHIs.clear(); + + // See if we had a local def, and if not, go hunting. + MemoryAccess *DefBefore = getPreviousDefInBlock(MD); + bool DefBeforeSameBlock = DefBefore != nullptr; + if (!DefBefore) + DefBefore = getPreviousDefRecursive(MD->getBlock()); + + // There is a def before us, which means we can replace any store/phi uses + // of that thing with us, since we are in the way of whatever was there + // before. + // We now define that def's memorydefs and memoryphis + if (DefBeforeSameBlock) { + for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end(); + UI != UE;) { + Use &U = *UI++; + // Leave the uses alone + if (isa<MemoryUse>(U.getUser())) + continue; + U.set(MD); + } + } + + // and that def is now our defining access. + // We change them in this order otherwise we will appear in the use list + // above and reset ourselves. + MD->setDefiningAccess(DefBefore); + + SmallVector<MemoryAccess *, 8> FixupList(InsertedPHIs.begin(), + InsertedPHIs.end()); + if (!DefBeforeSameBlock) { + // If there was a local def before us, we must have the same effect it + // did. Because every may-def is the same, any phis/etc we would create, it + // would also have created. If there was no local def before us, we + // performed a global update, and have to search all successors and make + // sure we update the first def in each of them (following all paths until + // we hit the first def along each path). This may also insert phi nodes. + // TODO: There are other cases we can skip this work, such as when we have a + // single successor, and only used a straight line of single pred blocks + // backwards to find the def. To make that work, we'd have to track whether + // getDefRecursive only ever used the single predecessor case. These types + // of paths also only exist in between CFG simplifications. + FixupList.push_back(MD); + } + + while (!FixupList.empty()) { + unsigned StartingPHISize = InsertedPHIs.size(); + fixupDefs(FixupList); + FixupList.clear(); + // Put any new phis on the fixup list, and process them + FixupList.append(InsertedPHIs.end() - StartingPHISize, InsertedPHIs.end()); + } + // Now that all fixups are done, rename all uses if we are asked. + if (RenameUses) { + SmallPtrSet<BasicBlock *, 16> Visited; + BasicBlock *StartBlock = MD->getBlock(); + // We are guaranteed there is a def in the block, because we just got it + // handed to us in this function. + MemoryAccess *FirstDef = &*MSSA->getWritableBlockDefs(StartBlock)->begin(); + // Convert to incoming value if it's a memorydef. A phi *is* already an + // incoming value. + if (auto *MD = dyn_cast<MemoryDef>(FirstDef)) + FirstDef = MD->getDefiningAccess(); + + MSSA->renamePass(MD->getBlock(), FirstDef, Visited); + // We just inserted a phi into this block, so the incoming value will become + // the phi anyway, so it does not matter what we pass. + for (auto *MP : InsertedPHIs) + MSSA->renamePass(MP->getBlock(), nullptr, Visited); + } +} + +void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) { + SmallPtrSet<const BasicBlock *, 8> Seen; + SmallVector<const BasicBlock *, 16> Worklist; + for (auto *NewDef : Vars) { + // First, see if there is a local def after the operand. + auto *Defs = MSSA->getWritableBlockDefs(NewDef->getBlock()); + auto DefIter = NewDef->getDefsIterator(); + + // If there is a local def after us, we only have to rename that. + if (++DefIter != Defs->end()) { + cast<MemoryDef>(DefIter)->setDefiningAccess(NewDef); + continue; + } + + // Otherwise, we need to search down through the CFG. + // For each of our successors, handle it directly if their is a phi, or + // place on the fixup worklist. + for (const auto *S : successors(NewDef->getBlock())) { + if (auto *MP = MSSA->getMemoryAccess(S)) + setMemoryPhiValueForBlock(MP, NewDef->getBlock(), NewDef); + else + Worklist.push_back(S); + } + + while (!Worklist.empty()) { + const BasicBlock *FixupBlock = Worklist.back(); + Worklist.pop_back(); + + // Get the first def in the block that isn't a phi node. + if (auto *Defs = MSSA->getWritableBlockDefs(FixupBlock)) { + auto *FirstDef = &*Defs->begin(); + // The loop above and below should have taken care of phi nodes + assert(!isa<MemoryPhi>(FirstDef) && + "Should have already handled phi nodes!"); + // We are now this def's defining access, make sure we actually dominate + // it + assert(MSSA->dominates(NewDef, FirstDef) && + "Should have dominated the new access"); + + // This may insert new phi nodes, because we are not guaranteed the + // block we are processing has a single pred, and depending where the + // store was inserted, it may require phi nodes below it. + cast<MemoryDef>(FirstDef)->setDefiningAccess(getPreviousDef(FirstDef)); + return; + } + // We didn't find a def, so we must continue. + for (const auto *S : successors(FixupBlock)) { + // If there is a phi node, handle it. + // Otherwise, put the block on the worklist + if (auto *MP = MSSA->getMemoryAccess(S)) + setMemoryPhiValueForBlock(MP, FixupBlock, NewDef); + else { + // If we cycle, we should have ended up at a phi node that we already + // processed. FIXME: Double check this + if (!Seen.insert(S).second) + continue; + Worklist.push_back(S); + } + } + } + } +} + +// Move What before Where in the MemorySSA IR. +template <class WhereType> +void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, + WhereType Where) { + // Replace all our users with our defining access. + What->replaceAllUsesWith(What->getDefiningAccess()); + + // Let MemorySSA take care of moving it around in the lists. + MSSA->moveTo(What, BB, Where); + + // Now reinsert it into the IR and do whatever fixups needed. + if (auto *MD = dyn_cast<MemoryDef>(What)) + insertDef(MD); + else + insertUse(cast<MemoryUse>(What)); +} + +// Move What before Where in the MemorySSA IR. +void MemorySSAUpdater::moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where) { + moveTo(What, Where->getBlock(), Where->getIterator()); +} + +// Move What after Where in the MemorySSA IR. +void MemorySSAUpdater::moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where) { + moveTo(What, Where->getBlock(), ++Where->getIterator()); +} + +void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, + MemorySSA::InsertionPlace Where) { + return moveTo(What, BB, Where); +} + +/// \brief If all arguments of a MemoryPHI are defined by the same incoming +/// argument, return that argument. +static MemoryAccess *onlySingleValue(MemoryPhi *MP) { + MemoryAccess *MA = nullptr; + + for (auto &Arg : MP->operands()) { + if (!MA) + MA = cast<MemoryAccess>(Arg); + else if (MA != Arg) + return nullptr; + } + return MA; +} +void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { + assert(!MSSA->isLiveOnEntryDef(MA) && + "Trying to remove the live on entry def"); + // We can only delete phi nodes if they have no uses, or we can replace all + // uses with a single definition. + MemoryAccess *NewDefTarget = nullptr; + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) { + // Note that it is sufficient to know that all edges of the phi node have + // the same argument. If they do, by the definition of dominance frontiers + // (which we used to place this phi), that argument must dominate this phi, + // and thus, must dominate the phi's uses, and so we will not hit the assert + // below. + NewDefTarget = onlySingleValue(MP); + assert((NewDefTarget || MP->use_empty()) && + "We can't delete this memory phi"); + } else { + NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess(); + } + + // Re-point the uses at our defining access + if (!isa<MemoryUse>(MA) && !MA->use_empty()) { + // Reset optimized on users of this store, and reset the uses. + // A few notes: + // 1. This is a slightly modified version of RAUW to avoid walking the + // uses twice here. + // 2. If we wanted to be complete, we would have to reset the optimized + // flags on users of phi nodes if doing the below makes a phi node have all + // the same arguments. Instead, we prefer users to removeMemoryAccess those + // phi nodes, because doing it here would be N^3. + if (MA->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget); + // Note: We assume MemorySSA is not used in metadata since it's not really + // part of the IR. + + while (!MA->use_empty()) { + Use &U = *MA->use_begin(); + if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser())) + MUD->resetOptimized(); + U.set(NewDefTarget); + } + } + + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + MSSA->removeFromLookups(MA); + MSSA->removeFromLists(MA); +} + +MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( + Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, + MemorySSA::InsertionPlace Point) { + MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition); + MSSA->insertIntoListsForBlock(NewAccess, BB, Point); + return NewAccess; +} + +MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessBefore( + Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition); + MSSA->insertIntoListsBefore(NewAccess, InsertPt->getBlock(), + InsertPt->getIterator()); + return NewAccess; +} + +MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter( + Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition); + MSSA->insertIntoListsBefore(NewAccess, InsertPt->getBlock(), + ++InsertPt->getIterator()); + return NewAccess; +} + +} // namespace llvm diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index f5ba637e58e2..f6d9a73e4e9a 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -28,7 +28,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/ValueSymbolTable.h" -#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Pass.h" using namespace llvm; @@ -84,6 +84,92 @@ static bool isNonRenamableLocal(const GlobalValue &GV) { return GV.hasSection() && GV.hasLocalLinkage(); } +/// Determine whether this call has all constant integer arguments (excluding +/// "this") and summarize it to VCalls or ConstVCalls as appropriate. +static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid, + SetVector<FunctionSummary::VFuncId> &VCalls, + SetVector<FunctionSummary::ConstVCall> &ConstVCalls) { + std::vector<uint64_t> Args; + // Start from the second argument to skip the "this" pointer. + for (auto &Arg : make_range(Call.CS.arg_begin() + 1, Call.CS.arg_end())) { + auto *CI = dyn_cast<ConstantInt>(Arg); + if (!CI || CI->getBitWidth() > 64) { + VCalls.insert({Guid, Call.Offset}); + return; + } + Args.push_back(CI->getZExtValue()); + } + ConstVCalls.insert({{Guid, Call.Offset}, std::move(Args)}); +} + +/// If this intrinsic call requires that we add information to the function +/// summary, do so via the non-constant reference arguments. +static void addIntrinsicToSummary( + const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests, + SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls, + SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls, + SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls, + SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls) { + switch (CI->getCalledFunction()->getIntrinsicID()) { + case Intrinsic::type_test: { + auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); + auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()); + if (!TypeId) + break; + GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString()); + + // Produce a summary from type.test intrinsics. We only summarize type.test + // intrinsics that are used other than by an llvm.assume intrinsic. + // Intrinsics that are assumed are relevant only to the devirtualization + // pass, not the type test lowering pass. + bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) { + auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser()); + if (!AssumeCI) + return true; + Function *F = AssumeCI->getCalledFunction(); + return !F || F->getIntrinsicID() != Intrinsic::assume; + }); + if (HasNonAssumeUses) + TypeTests.insert(Guid); + + SmallVector<DevirtCallSite, 4> DevirtCalls; + SmallVector<CallInst *, 4> Assumes; + findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI); + for (auto &Call : DevirtCalls) + addVCallToSet(Call, Guid, TypeTestAssumeVCalls, + TypeTestAssumeConstVCalls); + + break; + } + + case Intrinsic::type_checked_load: { + auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2)); + auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()); + if (!TypeId) + break; + GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString()); + + SmallVector<DevirtCallSite, 4> DevirtCalls; + SmallVector<Instruction *, 4> LoadedPtrs; + SmallVector<Instruction *, 4> Preds; + bool HasNonCallUses = false; + findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, + HasNonCallUses, CI); + // Any non-call uses of the result of llvm.type.checked.load will + // prevent us from optimizing away the llvm.type.test. + if (HasNonCallUses) + TypeTests.insert(Guid); + for (auto &Call : DevirtCalls) + addVCallToSet(Call, Guid, TypeCheckedLoadVCalls, + TypeCheckedLoadConstVCalls); + + break; + } + default: + break; + } +} + static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, @@ -99,6 +185,10 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, MapVector<ValueInfo, CalleeInfo> CallGraphEdges; SetVector<ValueInfo> RefEdges; SetVector<GlobalValue::GUID> TypeTests; + SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls, + TypeCheckedLoadVCalls; + SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls, + TypeCheckedLoadConstVCalls; ICallPromotionAnalysis ICallAnalysis; bool HasInlineAsmMaybeReferencingInternal = false; @@ -133,29 +223,15 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // Check if this is a direct call to a known function or a known // intrinsic, or an indirect call with profile data. if (CalledFunction) { - if (CalledFunction->isIntrinsic()) { - if (CalledFunction->getIntrinsicID() != Intrinsic::type_test) - continue; - // Produce a summary from type.test intrinsics. We only summarize - // type.test intrinsics that are used other than by an llvm.assume - // intrinsic. Intrinsics that are assumed are relevant only to the - // devirtualization pass, not the type test lowering pass. - bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) { - auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser()); - if (!AssumeCI) - return true; - Function *F = AssumeCI->getCalledFunction(); - return !F || F->getIntrinsicID() != Intrinsic::assume; - }); - if (HasNonAssumeUses) { - auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); - if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata())) - TypeTests.insert(GlobalValue::getGUID(TypeId->getString())); - } + if (CI && CalledFunction->isIntrinsic()) { + addIntrinsicToSummary( + CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls, + TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls); + continue; } // We should have named any anonymous globals assert(CalledFunction->hasName()); - auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; + auto ScaledCount = ProfileSummaryInfo::getProfileCount(&I, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; @@ -183,6 +259,11 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } } + // Explicit add hot edges to enforce importing for designated GUIDs for + // sample PGO, to enable the same inlines as the profiled optimized binary. + for (auto &I : F.getImportGUIDs()) + CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot); + bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || @@ -193,7 +274,10 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, /* LiveRoot = */ false); auto FuncSummary = llvm::make_unique<FunctionSummary>( Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), - TypeTests.takeVector()); + TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), + TypeCheckedLoadVCalls.takeVector(), + TypeTestAssumeConstVCalls.takeVector(), + TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary)); @@ -326,9 +410,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // be listed on the llvm.used or llvm.compiler.used global and marked as // referenced from there. ModuleSymbolTable::CollectAsmSymbols( - Triple(M.getTargetTriple()), M.getModuleInlineAsm(), - [&M, &Index, &CantBePromoted](StringRef Name, - object::BasicSymbolRef::Flags Flags) { + M, [&M, &Index, &CantBePromoted](StringRef Name, + object::BasicSymbolRef::Flags Flags) { // Symbols not marked as Weak or Global are local definitions. if (Flags & (object::BasicSymbolRef::SF_Weak | object::BasicSymbolRef::SF_Global)) @@ -347,7 +430,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( llvm::make_unique<FunctionSummary>( GVFlags, 0, ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, - ArrayRef<GlobalValue::GUID>{}); + ArrayRef<GlobalValue::GUID>{}, + ArrayRef<FunctionSummary::VFuncId>{}, + ArrayRef<FunctionSummary::VFuncId>{}, + ArrayRef<FunctionSummary::ConstVCall>{}, + ArrayRef<FunctionSummary::ConstVCall>{}); Index.addGlobalValueSummary(Name, std::move(Summary)); } else { std::unique_ptr<GlobalVarSummary> Summary = @@ -364,6 +451,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( auto &Summary = GlobalList.second[0]; bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { + // If a global value definition references an unnamed global, + // be conservative. They're valid IR so we don't want to crash + // when we encounter any of them but they're infrequent enough + // that we don't bother optimizing them. + if (!VI.getValue()->hasName()) + return false; return !CantBePromoted.count(VI.getValue()->getGUID()); }); if (!AllRefsCanBeExternallyReferenced) { diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp index fa8b07d61b01..73245981b022 100644 --- a/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -23,14 +23,14 @@ using namespace llvm; -OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F) +OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) : F(F), BFI(nullptr) { if (!F->getContext().getDiagnosticHotnessRequested()) return; // First create a dominator tree. DominatorTree DT; - DT.recalculate(*F); + DT.recalculate(*const_cast<Function *>(F)); // Generate LoopInfo from it. LoopInfo LI; @@ -45,6 +45,18 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F) BFI = OwnedBFI.get(); } +bool OptimizationRemarkEmitter::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // This analysis has no state and so can be trivially preserved but it needs + // a fresh view of BFI if it was constructed with one. + if (BFI && Inv.invalidate<BlockFrequencyAnalysis>(F, PA)) + return true; + + // Otherwise this analysis result remains valid. + return false; +} + Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) { if (!BFI) return None; @@ -55,53 +67,59 @@ Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) { namespace llvm { namespace yaml { -template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> { - static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) { - assert(io.outputting() && "input not yet implemented"); +void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping( + IO &io, DiagnosticInfoOptimizationBase *&OptDiag) { + assert(io.outputting() && "input not yet implemented"); + + if (io.mapTag("!Passed", + (OptDiag->getKind() == DK_OptimizationRemark || + OptDiag->getKind() == DK_MachineOptimizationRemark))) + ; + else if (io.mapTag( + "!Missed", + (OptDiag->getKind() == DK_OptimizationRemarkMissed || + OptDiag->getKind() == DK_MachineOptimizationRemarkMissed))) + ; + else if (io.mapTag( + "!Analysis", + (OptDiag->getKind() == DK_OptimizationRemarkAnalysis || + OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis))) + ; + else if (io.mapTag("!AnalysisFPCommute", + OptDiag->getKind() == + DK_OptimizationRemarkAnalysisFPCommute)) + ; + else if (io.mapTag("!AnalysisAliasing", + OptDiag->getKind() == + DK_OptimizationRemarkAnalysisAliasing)) + ; + else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure)) + ; + else + llvm_unreachable("Unknown remark type"); - if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark)) - ; - else if (io.mapTag("!Missed", - OptDiag->getKind() == DK_OptimizationRemarkMissed)) - ; - else if (io.mapTag("!Analysis", - OptDiag->getKind() == DK_OptimizationRemarkAnalysis)) - ; - else if (io.mapTag("!AnalysisFPCommute", - OptDiag->getKind() == - DK_OptimizationRemarkAnalysisFPCommute)) - ; - else if (io.mapTag("!AnalysisAliasing", - OptDiag->getKind() == - DK_OptimizationRemarkAnalysisAliasing)) - ; - else - llvm_unreachable("todo"); - - // These are read-only for now. - DebugLoc DL = OptDiag->getDebugLoc(); - StringRef FN = GlobalValue::getRealLinkageName( - OptDiag->getFunction().getName()); - - StringRef PassName(OptDiag->PassName); - io.mapRequired("Pass", PassName); - io.mapRequired("Name", OptDiag->RemarkName); - if (!io.outputting() || DL) - io.mapOptional("DebugLoc", DL); - io.mapRequired("Function", FN); - io.mapOptional("Hotness", OptDiag->Hotness); - io.mapOptional("Args", OptDiag->Args); - } -}; + // These are read-only for now. + DiagnosticLocation DL = OptDiag->getLocation(); + StringRef FN = + GlobalValue::getRealLinkageName(OptDiag->getFunction().getName()); + + StringRef PassName(OptDiag->PassName); + io.mapRequired("Pass", PassName); + io.mapRequired("Name", OptDiag->RemarkName); + if (!io.outputting() || DL.isValid()) + io.mapOptional("DebugLoc", DL); + io.mapRequired("Function", FN); + io.mapOptional("Hotness", OptDiag->Hotness); + io.mapOptional("Args", OptDiag->Args); +} -template <> struct MappingTraits<DebugLoc> { - static void mapping(IO &io, DebugLoc &DL) { +template <> struct MappingTraits<DiagnosticLocation> { + static void mapping(IO &io, DiagnosticLocation &DL) { assert(io.outputting() && "input not yet implemented"); - auto *Scope = cast<DIScope>(DL.getScope()); - StringRef File = Scope->getFilename(); + StringRef File = DL.getFilename(); unsigned Line = DL.getLine(); - unsigned Col = DL.getCol(); + unsigned Col = DL.getColumn(); io.mapRequired("File", File); io.mapRequired("Line", Line); @@ -116,8 +134,8 @@ template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> { static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { assert(io.outputting() && "input not yet implemented"); io.mapRequired(A.Key.data(), A.Val); - if (A.DLoc) - io.mapOptional("DebugLoc", A.DLoc); + if (A.Loc.isValid()) + io.mapOptional("DebugLoc", A.Loc); } }; @@ -127,18 +145,20 @@ template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> { LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument) void OptimizationRemarkEmitter::computeHotness( - DiagnosticInfoOptimizationBase &OptDiag) { - Value *V = OptDiag.getCodeRegion(); + DiagnosticInfoIROptimization &OptDiag) { + const Value *V = OptDiag.getCodeRegion(); if (V) OptDiag.setHotness(computeHotness(V)); } -void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) { +void OptimizationRemarkEmitter::emit( + DiagnosticInfoOptimizationBase &OptDiagBase) { + auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase); computeHotness(OptDiag); yaml::Output *Out = F->getContext().getDiagnosticsOutputFile(); if (Out) { - auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag); + auto *P = const_cast<DiagnosticInfoOptimizationBase *>(&OptDiagBase); *Out << P; } // FIXME: now that IsVerbose is part of DI, filtering for this will be moved @@ -147,72 +167,6 @@ void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) { F->getContext().diagnose(OptDiag); } -void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName, - const DebugLoc &DLoc, - const Value *V, - const Twine &Msg) { - LLVMContext &Ctx = F->getContext(); - Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V))); -} - -void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName, - Loop *L, - const Twine &Msg) { - emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkMissed( - const char *PassName, const DebugLoc &DLoc, const Value *V, - const Twine &Msg, bool IsVerbose) { - LLVMContext &Ctx = F->getContext(); - if (!IsVerbose || shouldEmitVerbose()) - Ctx.diagnose( - OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V))); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkMissed( - const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) { - emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg, - IsVerbose); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis( - const char *PassName, const DebugLoc &DLoc, const Value *V, - const Twine &Msg, bool IsVerbose) { - LLVMContext &Ctx = F->getContext(); - if (!IsVerbose || shouldEmitVerbose()) - Ctx.diagnose( - OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V))); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis( - const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) { - emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(), - Msg, IsVerbose); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute( - const char *PassName, const DebugLoc &DLoc, const Value *V, - const Twine &Msg) { - LLVMContext &Ctx = F->getContext(); - Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg, - computeHotness(V))); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing( - const char *PassName, const DebugLoc &DLoc, const Value *V, - const Twine &Msg) { - LLVMContext &Ctx = F->getContext(); - Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg, - computeHotness(V))); -} - -void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing( - const char *PassName, Loop *L, const Twine &Msg) { - emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(), - L->getHeader(), Msg); -} - OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() : FunctionPass(ID) { initializeOptimizationRemarkEmitterWrapperPassPass( diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index cb9438a2f928..1caf151546d9 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -31,6 +31,15 @@ char PostDominatorTreeWrapperPass::ID = 0; INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree", "Post-Dominator Tree Construction", true, true) +bool PostDominatorTree::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<PostDominatorTreeAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || + PAC.preservedSet<CFGAnalyses>()); +} + bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) { DT.recalculate(F); return false; diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index 16d3614c14c6..1a53a8ed4283 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -12,9 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" @@ -55,22 +56,40 @@ static uint64_t getMinCountForPercentile(SummaryEntryVector &DS, // The profile summary metadata may be attached either by the frontend or by // any backend passes (IR level instrumentation, for example). This method // checks if the Summary is null and if so checks if the summary metadata is now -// available in the module and parses it to get the Summary object. -void ProfileSummaryInfo::computeSummary() { +// available in the module and parses it to get the Summary object. Returns true +// if a valid Summary is available. +bool ProfileSummaryInfo::computeSummary() { if (Summary) - return; + return true; auto *SummaryMD = M.getProfileSummary(); if (!SummaryMD) - return; + return false; Summary.reset(ProfileSummary::getFromMD(SummaryMD)); + return true; +} + +Optional<uint64_t> +ProfileSummaryInfo::getProfileCount(const Instruction *Inst, + BlockFrequencyInfo *BFI) { + if (!Inst) + return None; + assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && + "We can only get profile count for call/invoke instruction."); + // Check if there is a profile metadata on the instruction. If it is present, + // determine hotness solely based on that. + uint64_t TotalCount; + if (Inst->extractProfTotalWeight(TotalCount)) + return TotalCount; + if (BFI) + return BFI->getBlockProfileCount(Inst->getParent()); + return None; } /// Returns true if the function's entry is hot. If it returns false, it /// either means it is not hot or it is unknown whether it is hot or not (for /// example, no profile data is available). bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { - computeSummary(); - if (!F || !Summary) + if (!F || !computeSummary()) return false; auto FunctionCount = F->getEntryCount(); // FIXME: The heuristic used below for determining hotness is based on @@ -79,17 +98,53 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { return FunctionCount && isHotCount(FunctionCount.getValue()); } +/// Returns true if the function's entry or total call edge count is hot. +/// If it returns false, it either means it is not hot or it is unknown +/// whether it is hot or not (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) { + if (!F || !computeSummary()) + return false; + if (auto FunctionCount = F->getEntryCount()) + if (isHotCount(FunctionCount.getValue())) + return true; + + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + return isHotCount(TotalCallCount); +} + +/// Returns true if the function's entry and total call edge count is cold. +/// If it returns false, it either means it is not cold or it is unknown +/// whether it is cold or not (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) { + if (!F || !computeSummary()) + return false; + if (auto FunctionCount = F->getEntryCount()) + if (!isColdCount(FunctionCount.getValue())) + return false; + + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + return isColdCount(TotalCallCount); +} + /// Returns true if the function's entry is a cold. If it returns false, it /// either means it is not cold or it is unknown whether it is cold or not (for /// example, no profile data is available). bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { - computeSummary(); if (!F) return false; - if (F->hasFnAttribute(Attribute::Cold)) { + if (F->hasFnAttribute(Attribute::Cold)) return true; - } - if (!Summary) + if (!computeSummary()) return false; auto FunctionCount = F->getEntryCount(); // FIXME: The heuristic used below for determining coldness is based on @@ -100,9 +155,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { /// Compute the hot and cold thresholds. void ProfileSummaryInfo::computeThresholds() { - if (!Summary) - computeSummary(); - if (!Summary) + if (!computeSummary()) return; auto &DetailedSummary = Summary->getDetailedSummary(); HotCountThreshold = @@ -125,20 +178,25 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) { auto Count = BFI->getBlockProfileCount(B); - if (Count && isHotCount(*Count)) - return true; - // Use extractProfTotalWeight to get BB count. - // For Sample PGO, BFI may not provide accurate BB count due to errors - // magnified during sample count propagation. This serves as a backup plan - // to ensure all hot BB will not be missed. - // The query currently has false positives as branch instruction cloning does - // not update/scale branch weights. Unlike false negatives, this will not cause - // performance problem. - uint64_t TotalCount; - if (B->getTerminator()->extractProfTotalWeight(TotalCount) && - isHotCount(TotalCount)) - return true; - return false; + return Count && isHotCount(*Count); +} + +bool ProfileSummaryInfo::isColdBB(const BasicBlock *B, + BlockFrequencyInfo *BFI) { + auto Count = BFI->getBlockProfileCount(B); + return Count && isColdCount(*Count); +} + +bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, + BlockFrequencyInfo *BFI) { + auto C = getProfileCount(CS.getInstruction(), BFI); + return C && isHotCount(*C); +} + +bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, + BlockFrequencyInfo *BFI) { + auto C = getProfileCount(CS.getInstruction(), BFI); + return C && isColdCount(*C); } INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 8c084ddd2266..63ef8d28d44a 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -83,6 +83,15 @@ RegionInfo::~RegionInfo() { } +bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker<RegionInfoAnalysis>(); + return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || + PAC.preservedSet<CFGAnalyses>()); +} + void RegionInfo::updateStatistics(Region *R) { ++numRegions; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 7358aa6810a1..82107cb18025 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -206,6 +206,8 @@ public: return false; } + + StringRef getPassName() const override { return "Print Region IR"; }
}; char PrintRegionPass::ID = 0; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index ed328f12c463..ca32cf3c7c34 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -127,16 +127,35 @@ static cl::opt<unsigned> MulOpsInlineThreshold( cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(1000)); +static cl::opt<unsigned> AddOpsInlineThreshold( + "scev-addops-inline-threshold", cl::Hidden, + cl::desc("Threshold for inlining multiplication operands into a SCEV"), + cl::init(500)); + static cl::opt<unsigned> MaxSCEVCompareDepth( "scalar-evolution-max-scev-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive SCEV complexity comparisons"), cl::init(32)); +static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth( + "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden, + cl::desc("Maximum depth of recursive SCEV operations implication analysis"), + cl::init(2)); + static cl::opt<unsigned> MaxValueCompareDepth( "scalar-evolution-max-value-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive value complexity comparisons"), cl::init(2)); +static cl::opt<unsigned> + MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden, + cl::desc("Maximum depth of recursive AddExpr"), + cl::init(32)); + +static cl::opt<unsigned> MaxConstantEvolvingDepth( + "scalar-evolution-max-constant-evolving-depth", cl::Hidden, + cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -145,11 +164,12 @@ static cl::opt<unsigned> MaxValueCompareDepth( // Implementation of the SCEV class. // -LLVM_DUMP_METHOD -void SCEV::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif void SCEV::print(raw_ostream &OS) const { switch (static_cast<SCEVTypes>(getSCEVType())) { @@ -2095,7 +2115,8 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); @@ -2134,6 +2155,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Ops.size() == 1) return Ops[0]; } + // Limit recursion calls depth + if (Depth > MaxAddExprDepth) + return getOrCreateAddExpr(Ops, Flags); + // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we // sorted the list, these values are required to be adjacent. @@ -2205,7 +2230,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } if (Ok) { // Evaluate the expression in the larger type. - const SCEV *Fold = getAddExpr(LargeOps, Flags); + const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1); // If it folds to something simple, use it. Otherwise, don't. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) return getTruncateExpr(Fold, DstType); @@ -2220,6 +2245,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Idx < Ops.size()) { bool DeletedAdd = false; while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { + if (Ops.size() > AddOpsInlineThreshold || + Add->getNumOperands() > AddOpsInlineThreshold) + break; // If we have an add, expand the add operands onto the end of the operands // list. Ops.erase(Ops.begin()+Idx); @@ -2231,7 +2259,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // and they are not necessarily sorted. Recurse to resort and resimplify // any operands we just acquired. if (DeletedAdd) - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Skip over the add expression until we get to a multiply. @@ -2266,13 +2294,14 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, Ops.push_back(getConstant(AccumulatedConstant)); for (auto &MulOp : MulOpLists) if (MulOp.first != 0) - Ops.push_back(getMulExpr(getConstant(MulOp.first), - getAddExpr(MulOp.second))); + Ops.push_back(getMulExpr( + getConstant(MulOp.first), + getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1))); if (Ops.empty()) return getZero(Ty); if (Ops.size() == 1) return Ops[0]; - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } @@ -2297,8 +2326,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul = getMulExpr(MulOps); } - const SCEV *One = getOne(Ty); - const SCEV *AddOne = getAddExpr(One, InnerMul); + SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul}; + const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { @@ -2309,7 +2338,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, Ops.erase(Ops.begin()+AddOp-1); } Ops.push_back(OuterMul); - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Check this multiply against other multiplies being added together. @@ -2337,13 +2366,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); InnerMul2 = getMulExpr(MulOps); } - const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2}; + const SCEV *InnerMulSum = + getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); Ops.push_back(OuterMul); - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } } } @@ -2379,7 +2410,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // This follows from the fact that the no-wrap flags on the outer add // expression are applicable on the 0th iteration, when the add recurrence // will be equal to its start value. - AddRecOps[0] = getAddExpr(LIOps, Flags); + AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. @@ -2396,7 +2427,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, Ops[i] = NewRec; break; } - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Okay, if there weren't any loop invariants to be folded, check to see if @@ -2420,14 +2451,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, OtherAddRec->op_end()); break; } - AddRecOps[i] = getAddExpr(AddRecOps[i], - OtherAddRec->getOperand(i)); + SmallVector<const SCEV *, 2> TwoOps = { + AddRecOps[i], OtherAddRec->getOperand(i)}; + AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); } Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } // Step size has changed, so we cannot guarantee no self-wraparound. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // Otherwise couldn't fold anything into this recurrence. Move onto the @@ -2436,18 +2468,24 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Okay, it looks like we really DO need an add expr. Check to see if we // already have one, otherwise create a new one. + return getOrCreateAddExpr(Ops, Flags); +} + +const SCEV * +ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; SCEVAddExpr *S = - static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); + S = new (SCEVAllocator) + SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } S->setNoWrapFlags(Flags); @@ -2889,7 +2927,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, // end of this file for inspiration. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS); - if (!Mul) + if (!Mul || !Mul->hasNoUnsignedWrap()) return getUDivExpr(LHS, RHS); if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) { @@ -3385,6 +3423,10 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { return getDataLayout().getIntPtrType(Ty); } +Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { + return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2; +} + const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } @@ -4409,8 +4451,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { return getGEPExpr(GEP, IndexExprs); } -uint32_t -ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { +uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return C->getAPInt().countTrailingZeros(); @@ -4420,14 +4461,16 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); - return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? - getTypeSizeInBits(E->getType()) : OpRes; + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) + ? getTypeSizeInBits(E->getType()) + : OpRes; } if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); - return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? - getTypeSizeInBits(E->getType()) : OpRes; + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) + ? getTypeSizeInBits(E->getType()) + : OpRes; } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { @@ -4444,8 +4487,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { uint32_t BitWidth = getTypeSizeInBits(M->getType()); for (unsigned i = 1, e = M->getNumOperands(); SumOpRes != BitWidth && i != e; ++i) - SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), - BitWidth); + SumOpRes = + std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth); return SumOpRes; } @@ -4486,6 +4529,17 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { return 0; } +uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { + auto I = MinTrailingZerosCache.find(S); + if (I != MinTrailingZerosCache.end()) + return I->second; + + uint32_t Result = GetMinTrailingZerosImpl(S); + auto InsertPair = MinTrailingZerosCache.insert({S, Result}); + assert(InsertPair.second && "Should insert a new key"); + return InsertPair.first->second; +} + /// Helper method to assign a range to V from metadata present in the IR. static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) @@ -4668,6 +4722,77 @@ ScalarEvolution::getRange(const SCEV *S, return setRange(S, SignHint, ConservativeResult); } +// Given a StartRange, Step and MaxBECount for an expression compute a range of +// values that the expression can take. Initially, the expression has a value +// from StartRange and then is changed by Step up to MaxBECount times. Signed +// argument defines if we treat Step as signed or unsigned. +static ConstantRange getRangeForAffineARHelper(APInt Step, + ConstantRange StartRange, + APInt MaxBECount, + unsigned BitWidth, bool Signed) { + // If either Step or MaxBECount is 0, then the expression won't change, and we + // just need to return the initial range. + if (Step == 0 || MaxBECount == 0) + return StartRange; + + // If we don't know anything about the initial value (i.e. StartRange is + // FullRange), then we don't know anything about the final range either. + // Return FullRange. + if (StartRange.isFullSet()) + return ConstantRange(BitWidth, /* isFullSet = */ true); + + // If Step is signed and negative, then we use its absolute value, but we also + // note that we're moving in the opposite direction. + bool Descending = Signed && Step.isNegative(); + + if (Signed) + // This is correct even for INT_SMIN. Let's look at i8 to illustrate this: + // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128. + // This equations hold true due to the well-defined wrap-around behavior of + // APInt. + Step = Step.abs(); + + // Check if Offset is more than full span of BitWidth. If it is, the + // expression is guaranteed to overflow. + if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) + return ConstantRange(BitWidth, /* isFullSet = */ true); + + // Offset is by how much the expression can change. Checks above guarantee no + // overflow here. + APInt Offset = Step * MaxBECount; + + // Minimum value of the final range will match the minimal value of StartRange + // if the expression is increasing and will be decreased by Offset otherwise. + // Maximum value of the final range will match the maximal value of StartRange + // if the expression is decreasing and will be increased by Offset otherwise. + APInt StartLower = StartRange.getLower(); + APInt StartUpper = StartRange.getUpper() - 1; + APInt MovedBoundary = + Descending ? (StartLower - Offset) : (StartUpper + Offset); + + // It's possible that the new minimum/maximum value will fall into the initial + // range (due to wrap around). This means that the expression can take any + // value in this bitwidth, and we have to return full range. + if (StartRange.contains(MovedBoundary)) + return ConstantRange(BitWidth, /* isFullSet = */ true); + + APInt NewLower, NewUpper; + if (Descending) { + NewLower = MovedBoundary; + NewUpper = StartUpper; + } else { + NewLower = StartLower; + NewUpper = MovedBoundary; + } + + // If we end up with full range, return a proper full range. + if (NewLower == NewUpper + 1) + return ConstantRange(BitWidth, /* isFullSet = */ true); + + // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. + return ConstantRange(NewLower, NewUpper + 1); +} + ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, @@ -4676,60 +4801,30 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && "Precondition!"); - ConstantRange Result(BitWidth, /* isFullSet = */ true); - - // Check for overflow. This must be done with ConstantRange arithmetic - // because we could be called from within the ScalarEvolution overflow - // checking code. - MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); - ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2); + APInt MaxBECountValue = MaxBECountRange.getUnsignedMax(); + // First, consider step signed. + ConstantRange StartSRange = getSignedRange(Start); ConstantRange StepSRange = getSignedRange(Step); - ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2); - - ConstantRange StartURange = getUnsignedRange(Start); - ConstantRange EndURange = - StartURange.add(MaxBECountRange.multiply(StepSRange)); - - // Check for unsigned overflow. - ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2); - ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2); - if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == - ZExtEndURange) { - APInt Min = APIntOps::umin(StartURange.getUnsignedMin(), - EndURange.getUnsignedMin()); - APInt Max = APIntOps::umax(StartURange.getUnsignedMax(), - EndURange.getUnsignedMax()); - bool IsFullRange = Min.isMinValue() && Max.isMaxValue(); - if (!IsFullRange) - Result = - Result.intersectWith(ConstantRange(Min, Max + 1)); - } - ConstantRange StartSRange = getSignedRange(Start); - ConstantRange EndSRange = - StartSRange.add(MaxBECountRange.multiply(StepSRange)); - - // Check for signed overflow. This must be done with ConstantRange - // arithmetic because we could be called from within the ScalarEvolution - // overflow checking code. - ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2); - ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2); - if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) == - SExtEndSRange) { - APInt Min = - APIntOps::smin(StartSRange.getSignedMin(), EndSRange.getSignedMin()); - APInt Max = - APIntOps::smax(StartSRange.getSignedMax(), EndSRange.getSignedMax()); - bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue(); - if (!IsFullRange) - Result = - Result.intersectWith(ConstantRange(Min, Max + 1)); - } + // If Step can be both positive and negative, we need to find ranges for the + // maximum absolute step values in both directions and union them. + ConstantRange SR = + getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange, + MaxBECountValue, BitWidth, /* Signed = */ true); + SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(), + StartSRange, MaxBECountValue, + BitWidth, /* Signed = */ true)); - return Result; + // Next, consider step unsigned. + ConstantRange UR = getRangeForAffineARHelper( + getUnsignedRange(Step).getUnsignedMax(), getUnsignedRange(Start), + MaxBECountValue, BitWidth, /* Signed = */ false); + + // Finally, intersect signed and unsigned ranges. + return SR.intersectWith(UR); } ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, @@ -5148,12 +5243,27 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { - const SCEV *MulCount = getConstant(ConstantInt::get( - getContext(), APInt::getOneBitSet(BitWidth, TZ))); + const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); + const SCEV *LHS = getSCEV(BO->LHS); + const SCEV *ShiftedLHS = nullptr; + if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) { + if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) { + // For an expression like (x * 8) & 8, simplify the multiply. + unsigned MulZeros = OpC->getAPInt().countTrailingZeros(); + unsigned GCD = std::min(MulZeros, TZ); + APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); + SmallVector<const SCEV*, 4> MulOps; + MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); + MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end()); + auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); + ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); + } + } + if (!ShiftedLHS) + ShiftedLHS = getUDivExpr(LHS, MulCount); return getMulExpr( getZeroExtendExpr( - getTruncateExpr( - getUDivExactExpr(getSCEV(BO->LHS), MulCount), + getTruncateExpr(ShiftedLHS, IntegerType::get(getContext(), BitWidth - LZ - TZ)), BO->LHS->getType()), MulCount); @@ -5211,7 +5321,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // If C is a low-bits mask, the zero extend is serving to // mask off the high bits. Complement the operand and // re-apply the zext. - if (APIntOps::isMask(Z0TySize, CI->getValue())) + if (CI->getValue().isMask(Z0TySize)) return getZeroExtendExpr(getNotSCEV(Z0), UTy); // If C is a single bit, it may be in the sign-bit position @@ -5255,28 +5365,55 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; case Instruction::AShr: - // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. - if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) - if (Operator *L = dyn_cast<Operator>(BO->LHS)) - if (L->getOpcode() == Instruction::Shl && - L->getOperand(1) == BO->RHS) { - uint64_t BitWidth = getTypeSizeInBits(BO->LHS->getType()); - - // If the shift count is not less than the bitwidth, the result of - // the shift is undefined. Don't try to analyze it, because the - // resolution chosen here may differ from the resolution chosen in - // other parts of the compiler. - if (CI->getValue().uge(BitWidth)) - break; + // AShr X, C, where C is a constant. + ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS); + if (!CI) + break; + + Type *OuterTy = BO->LHS->getType(); + uint64_t BitWidth = getTypeSizeInBits(OuterTy); + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (CI->getValue().uge(BitWidth)) + break; - uint64_t Amt = BitWidth - CI->getZExtValue(); - if (Amt == BitWidth) - return getSCEV(L->getOperand(0)); // shift by zero --> noop + if (CI->isNullValue()) + return getSCEV(BO->LHS); // shift by zero --> noop + + uint64_t AShrAmt = CI->getZExtValue(); + Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt); + + Operator *L = dyn_cast<Operator>(BO->LHS); + if (L && L->getOpcode() == Instruction::Shl) { + // X = Shl A, n + // Y = AShr X, m + // Both n and m are constant. + + const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0)); + if (L->getOperand(1) == BO->RHS) + // For a two-shift sext-inreg, i.e. n = m, + // use sext(trunc(x)) as the SCEV expression. + return getSignExtendExpr( + getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy); + + ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1)); + if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) { + uint64_t ShlAmt = ShlAmtCI->getZExtValue(); + if (ShlAmt > AShrAmt) { + // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV + // expression. We already checked that ShlAmt < BitWidth, so + // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as + // ShlAmt - AShrAmt < Amt. + APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, + ShlAmt - AShrAmt); return getSignExtendExpr( - getTruncateExpr(getSCEV(L->getOperand(0)), - IntegerType::get(getContext(), Amt)), - BO->LHS->getType()); + getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy), + getConstant(Mul)), OuterTy); } + } + } break; } } @@ -5348,7 +5485,7 @@ static unsigned getConstantTripCount(const SCEVConstant *ExitCount) { return ((unsigned)ExitConst->getZExtValue()) + 1; } -unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) { +unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { if (BasicBlock *ExitingBB = L->getExitingBlock()) return getSmallConstantTripCount(L, ExitingBB); @@ -5356,7 +5493,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) { return 0; } -unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, +unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && @@ -5366,13 +5503,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, return getConstantTripCount(ExitCount); } -unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) { +unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { const auto *MaxExitCount = dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L)); return getConstantTripCount(MaxExitCount); } -unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) { +unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { if (BasicBlock *ExitingBB = L->getExitingBlock()) return getSmallConstantTripMultiple(L, ExitingBB); @@ -5393,7 +5530,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) { /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. unsigned -ScalarEvolution::getSmallConstantTripMultiple(Loop *L, +ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, BasicBlock *ExitingBlock) { assert(ExitingBlock && "Must pass a non-null exiting block!"); assert(L->isLoopExiting(ExitingBlock) && @@ -5403,17 +5540,16 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L, return 1; // Get the trip count from the BE count by adding 1. - const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType())); - // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt - // to factor simple cases. - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) - TCMul = Mul->getOperand(0); - - const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul); - if (!MulC) - return 1; + const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType())); - ConstantInt *Result = MulC->getValue(); + const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr); + if (!TC) + // Attempt to factor more general cases. Returns the greatest power of + // two divisor. If overflow happens, the trip count expression is still + // divisible by the greatest power of 2 divisor returned. + return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr)); + + ConstantInt *Result = TC->getValue(); // Guard against huge trip counts (this requires checking // for zero to handle the case where the trip count == -1 and the @@ -5428,7 +5564,8 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L, /// Get the expression for the number of loop iterations for which this loop is /// guaranteed not to exit via ExitingBlock. Otherwise return /// SCEVCouldNotCompute. -const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { +const SCEV *ScalarEvolution::getExitCount(const Loop *L, + BasicBlock *ExitingBlock) { return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); } @@ -6408,7 +6545,10 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) { /// recursing through each instruction operand until reaching a loop header phi. static PHINode * getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, - DenseMap<Instruction *, PHINode *> &PHIMap) { + DenseMap<Instruction *, PHINode *> &PHIMap, + unsigned Depth) { + if (Depth > MaxConstantEvolvingDepth) + return nullptr; // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. @@ -6428,7 +6568,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, if (!P) { // Recurse and memoize the results, whether a phi is found or not. // This recursive call invalidates pointers into PHIMap. - P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); + P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1); PHIMap[OpInst] = P; } if (!P) @@ -6455,7 +6595,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { // Record non-constant instructions contained by the loop. DenseMap<Instruction *, PHINode *> PHIMap; - return getConstantEvolvingPHIOperands(I, L, PHIMap); + return getConstantEvolvingPHIOperands(I, L, PHIMap, 0); } /// EvaluateExpression - Given an expression that passes the @@ -7014,10 +7154,10 @@ const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. -static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, +static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); - assert(BW == B.getBitWidth() && "Bit widths must be the same."); + assert(BW == SE.getTypeSizeInBits(B->getType())); assert(A != 0 && "A must be non-zero."); // 1. D = gcd(A, N) @@ -7031,7 +7171,7 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, // // B is divisible by D if and only if the multiplicity of prime factor 2 for B // is not less than multiplicity of this prime factor for D. - if (B.countTrailingZeros() < Mult2) + if (SE.GetMinTrailingZeros(B) < Mult2) return SE.getCouldNotCompute(); // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic @@ -7049,9 +7189,8 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, // I * (B / D) mod (N / D) // To simplify the computation, we factor out the divide by D: // (I * B mod N) / D - APInt Result = (I * B).lshr(Mult2); - - return SE.getConstant(Result); + const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2)); + return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); } /// Find the roots of the quadratic equation for the given quadratic chrec @@ -7082,7 +7221,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C // The B coefficient is M-N/2 APInt B(M); - B -= sdiv(N,Two); + B -= N.sdiv(Two); // The A coefficient is N/2 APInt A(N.sdiv(Two)); @@ -7233,62 +7372,6 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates); } - // As a special case, handle the instance where Step is a positive power of - // two. In this case, determining whether Step divides Distance evenly can be - // done by counting and comparing the number of trailing zeros of Step and - // Distance. - if (!CountDown) { - const APInt &StepV = StepC->getAPInt(); - // StepV.isPowerOf2() returns true if StepV is an positive power of two. It - // also returns true if StepV is maximally negative (eg, INT_MIN), but that - // case is not handled as this code is guarded by !CountDown. - if (StepV.isPowerOf2() && - GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { - // Here we've constrained the equation to be of the form - // - // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0) - // - // where we're operating on a W bit wide integer domain and k is - // non-negative. The smallest unsigned solution for X is the trip count. - // - // (0) is equivalent to: - // - // 2^(N + k) * Distance' - 2^N * X = L * 2^W - // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N - // <=> 2^k * Distance' - X = L * 2^(W - N) - // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1) - // - // The smallest X satisfying (1) is unsigned remainder of dividing the LHS - // by 2^(W - N). - // - // <=> X = 2^k * Distance' URem 2^(W - N) ... (2) - // - // E.g. say we're solving - // - // 2 * Val = 2 * X (in i8) ... (3) - // - // then from (2), we get X = Val URem i8 128 (k = 0 in this case). - // - // Note: It is tempting to solve (3) by setting X = Val, but Val is not - // necessarily the smallest unsigned value of X that satisfies (3). - // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) - // is i8 1, not i8 -127 - - const auto *ModuloResult = getUDivExactExpr(Distance, Step); - - // Since SCEV does not have a URem node, we construct one using a truncate - // and a zero extend. - - unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); - auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); - auto *WideTy = Distance->getType(); - - const SCEV *Limit = - getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); - return ExitLimit(Limit, Limit, false, Predicates); - } - } - // If the condition controls loop exit (the loop exits only if the expression // is true) and the addition is no-wrap we can use unsigned divide to // compute the backedge count. In this case, the step may not divide the @@ -7301,13 +7384,10 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, return ExitLimit(Exact, Exact, false, Predicates); } - // Then, try to solve the above equation provided that Start is constant. - if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) { - const SCEV *E = SolveLinEquationWithOverflow( - StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this); - return ExitLimit(E, E, false, Predicates); - } - return getCouldNotCompute(); + // Solve the general equation. + const SCEV *E = SolveLinEquationWithOverflow( + StepC->getAPInt(), getNegativeSCEV(Start), *this); + return ExitLimit(E, E, false, Predicates); } ScalarEvolution::ExitLimit @@ -8488,19 +8568,161 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, llvm_unreachable("covered switch fell through?!"); } +bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, + unsigned Depth) { + assert(getTypeSizeInBits(LHS->getType()) == + getTypeSizeInBits(RHS->getType()) && + "LHS and RHS have different sizes?"); + assert(getTypeSizeInBits(FoundLHS->getType()) == + getTypeSizeInBits(FoundRHS->getType()) && + "FoundLHS and FoundRHS have different sizes?"); + // We want to avoid hurting the compile time with analysis of too big trees. + if (Depth > MaxSCEVOperationsImplicationDepth) + return false; + // We only want to work with ICMP_SGT comparison so far. + // TODO: Extend to ICMP_UGT? + if (Pred == ICmpInst::ICMP_SLT) { + Pred = ICmpInst::ICMP_SGT; + std::swap(LHS, RHS); + std::swap(FoundLHS, FoundRHS); + } + if (Pred != ICmpInst::ICMP_SGT) + return false; + + auto GetOpFromSExt = [&](const SCEV *S) { + if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S)) + return Ext->getOperand(); + // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off + // the constant in some cases. + return S; + }; + + // Acquire values from extensions. + auto *OrigFoundLHS = FoundLHS; + LHS = GetOpFromSExt(LHS); + FoundLHS = GetOpFromSExt(FoundLHS); + + // Is the SGT predicate can be proved trivially or using the found context. + auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) { + return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) || + isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS, + FoundRHS, Depth + 1); + }; + + if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) { + // We want to avoid creation of any new non-constant SCEV. Since we are + // going to compare the operands to RHS, we should be certain that we don't + // need any size extensions for this. So let's decline all cases when the + // sizes of types of LHS and RHS do not match. + // TODO: Maybe try to get RHS from sext to catch more cases? + if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType())) + return false; + + // Should not overflow. + if (!LHSAddExpr->hasNoSignedWrap()) + return false; + + auto *LL = LHSAddExpr->getOperand(0); + auto *LR = LHSAddExpr->getOperand(1); + auto *MinusOne = getNegativeSCEV(getOne(RHS->getType())); + + // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. + auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) { + return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS); + }; + // Try to prove the following rule: + // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS). + // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS). + if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL)) + return true; + } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) { + Value *LL, *LR; + // FIXME: Once we have SDiv implemented, we can get rid of this matching. + using namespace llvm::PatternMatch; + if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) { + // Rules for division. + // We are going to perform some comparisons with Denominator and its + // derivative expressions. In general case, creating a SCEV for it may + // lead to a complex analysis of the entire graph, and in particular it + // can request trip count recalculation for the same loop. This would + // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid + // this, we only want to create SCEVs that are constants in this section. + // So we bail if Denominator is not a constant. + if (!isa<ConstantInt>(LR)) + return false; + + auto *Denominator = cast<SCEVConstant>(getSCEV(LR)); + + // We want to make sure that LHS = FoundLHS / Denominator. If it is so, + // then a SCEV for the numerator already exists and matches with FoundLHS. + auto *Numerator = getExistingSCEV(LL); + if (!Numerator || Numerator->getType() != FoundLHS->getType()) + return false; + + // Make sure that the numerator matches with FoundLHS and the denominator + // is positive. + if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator)) + return false; + + auto *DTy = Denominator->getType(); + auto *FRHSTy = FoundRHS->getType(); + if (DTy->isPointerTy() != FRHSTy->isPointerTy()) + // One of types is a pointer and another one is not. We cannot extend + // them properly to a wider type, so let us just reject this case. + // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help + // to avoid this check. + return false; + + // Given that: + // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0. + auto *WTy = getWiderType(DTy, FRHSTy); + auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy); + auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy); + + // Try to prove the following rule: + // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS). + // For example, given that FoundLHS > 2. It means that FoundLHS is at + // least 3. If we divide it by Denominator < 4, we will have at least 1. + auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2)); + if (isKnownNonPositive(RHS) && + IsSGTViaContext(FoundRHSExt, DenomMinusTwo)) + return true; + + // Try to prove the following rule: + // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS). + // For example, given that FoundLHS > -3. Then FoundLHS is at least -2. + // If we divide it by Denominator > 2, then: + // 1. If FoundLHS is negative, then the result is 0. + // 2. If FoundLHS is non-negative, then the result is non-negative. + // Anyways, the result is non-negative. + auto *MinusOne = getNegativeSCEV(getOne(WTy)); + auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt); + if (isKnownNegative(RHS) && + IsSGTViaContext(FoundRHSExt, NegDenomMinusOne)) + return true; + } + } + + return false; +} + +bool +ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || + IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || + IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || + isKnownPredicateViaNoOverflow(Pred, LHS, RHS); +} + bool ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { - auto IsKnownPredicateFull = - [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || - IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || - IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || - isKnownPredicateViaNoOverflow(Pred, LHS, RHS); - }; - switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: @@ -8510,30 +8732,34 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) && - IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) && - IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) && - IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) && - IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; break; } + // Maybe it can be proved via operations? + if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + return false; } @@ -9524,6 +9750,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), WalkingBEDominatingConds(false), ProvingSplitPredicate(false), + MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( std::move(Arg.PredicatedBackedgeTakenCounts)), @@ -9621,6 +9848,13 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "Unpredictable predicated backedge-taken count. "; } OS << "\n"; + + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << "Loop "; + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": "; + OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n"; + } } static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) { @@ -9929,6 +10163,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { SignedRanges.erase(S); ExprValueMap.erase(S); HasRecMap.erase(S); + MinTrailingZerosCache.erase(S); auto RemoveSCEVFromBackedgeMap = [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index d15a7dbd20e6..6dd10441c4cb 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1268,8 +1268,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (PostIncLoops.count(L)) { PostIncLoopSet Loops; Loops.insert(L); - Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse( - Normalize, S, nullptr, nullptr, Loops, SE, SE.DT)); + Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE)); } // Strip off any non-loop-dominating component from the addrec start. diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index c1f9503816ee..2aaa4c1ae117 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -12,243 +12,100 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" using namespace llvm; -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, - const Loop *L, DominatorTree *DT) { - // If the user is in the loop, use the preinc value. - if (L->contains(User)) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) - return false; - - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that are not dominated by the latch block, but (since - // their uses occur in the predecessor block, not the block the PHI lives in) - // should still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast<PHINode>(User); - if (!PN || !Operand) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of Operand by the PHI node. If any use corresponds - // to a block that is not dominated by the latch block, give up and use the - // preincremented value. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Operand && - !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - - // Okay, all uses of Operand by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; -} +/// TransformKind - Different types of transformations that +/// TransformForPostIncUse can do. +enum TransformKind { + /// Normalize - Normalize according to the given loops. + Normalize, + /// Denormalize - Perform the inverse transform on the expression with the + /// given loop set. + Denormalize +}; namespace { - -/// Hold the state used during post-inc expression transformation, including a -/// map of transformed expressions. -class PostIncTransform { - TransformKind Kind; - PostIncLoopSet &Loops; - ScalarEvolution &SE; - DominatorTree &DT; - - DenseMap<const SCEV*, const SCEV*> Transformed; - -public: - PostIncTransform(TransformKind kind, PostIncLoopSet &loops, - ScalarEvolution &se, DominatorTree &dt): - Kind(kind), Loops(loops), SE(se), DT(dt) {} - - const SCEV *TransformSubExpr(const SCEV *S, Instruction *User, - Value *OperandValToReplace); - -protected: - const SCEV *TransformImpl(const SCEV *S, Instruction *User, - Value *OperandValToReplace); +struct NormalizeDenormalizeRewriter + : public SCEVRewriteVisitor<NormalizeDenormalizeRewriter> { + const TransformKind Kind; + + // NB! Pred is a function_ref. Storing it here is okay only because + // we're careful about the lifetime of NormalizeDenormalizeRewriter. + const NormalizePredTy Pred; + + NormalizeDenormalizeRewriter(TransformKind Kind, NormalizePredTy Pred, + ScalarEvolution &SE) + : SCEVRewriteVisitor<NormalizeDenormalizeRewriter>(SE), Kind(Kind), + Pred(Pred) {} + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr); }; - } // namespace -/// Implement post-inc transformation for all valid expression types. -const SCEV *PostIncTransform:: -TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { - - if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { - const SCEV *O = X->getOperand(); - const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); - if (O != N) - switch (S->getSCEVType()) { - case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); - case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); - case scTruncate: return SE.getTruncateExpr(N, S->getType()); - default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); - } - return S; - } - - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - // An addrec. This is the interesting part. - SmallVector<const SCEV *, 8> Operands; - const Loop *L = AR->getLoop(); - // The addrec conceptually uses its operands at loop entry. - Instruction *LUser = &L->getHeader()->front(); - // Transform each operand. - for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) { - Operands.push_back(TransformSubExpr(*I, LUser, nullptr)); +const SCEV * +NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) { + SmallVector<const SCEV *, 8> Operands; + + transform(AR->operands(), std::back_inserter(Operands), + [&](const SCEV *Op) { return visit(Op); }); + + // Conservatively use AnyWrap until/unless we need FlagNW. + const SCEV *Result = + SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); + switch (Kind) { + case Normalize: + // We want to normalize step expression, because otherwise we might not be + // able to denormalize to the original expression. + // + // Here is an example what will happen if we don't normalize step: + // ORIGINAL ISE: + // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25> + // NORMALIZED ISE: + // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+, + // (100 /u {0,+,1}<%bb16>)}<%bb25> + // DENORMALIZED BACK ISE: + // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+, + // (100 /u {1,+,1}<%bb16>)}<%bb25> + // Note that the initial value changes after normalization + + // denormalization, which isn't correct. + if (Pred(AR)) { + const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE)); + Result = SE.getMinusSCEV(Result, TransformedStep); } - // Conservatively use AnyWrap until/unless we need FlagNW. - const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); - switch (Kind) { - case NormalizeAutodetect: - // Normalize this SCEV by subtracting the expression for the final step. - // We only allow affine AddRecs to be normalized, otherwise we would not - // be able to correctly denormalize. - // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} - // Normalized form: {-2,+,1,+,2} - // Denormalized form: {1,+,3,+,2} - // - // However, denormalization would use a different step expression than - // normalization (see getPostIncExpr), generating the wrong final - // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} - if (AR->isAffine() && - IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getMinusSCEV(Result, TransformedStep); - Loops.insert(L); - } -#if 0 - // This assert is conceptually correct, but ScalarEvolution currently - // sometimes fails to canonicalize two equal SCEVs to exactly the same - // form. It's possibly a pessimization when this happens, but it isn't a - // correctness problem, so disable this assert for now. - assert(S == TransformSubExpr(Result, User, OperandValToReplace) && - "SCEV normalization is not invertible!"); -#endif - break; - case Normalize: - // We want to normalize step expression, because otherwise we might not be - // able to denormalize to the original expression. - // - // Here is an example what will happen if we don't normalize step: - // ORIGINAL ISE: - // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25> - // NORMALIZED ISE: - // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+, - // (100 /u {0,+,1}<%bb16>)}<%bb25> - // DENORMALIZED BACK ISE: - // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+, - // (100 /u {1,+,1}<%bb16>)}<%bb25> - // Note that the initial value changes after normalization + - // denormalization, which isn't correct. - if (Loops.count(L)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getMinusSCEV(Result, TransformedStep); - } -#if 0 - // See the comment on the assert above. - assert(S == TransformSubExpr(Result, User, OperandValToReplace) && - "SCEV normalization is not invertible!"); -#endif - break; - case Denormalize: - // Here we want to normalize step expressions for the same reasons, as - // stated above. - if (Loops.count(L)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getAddExpr(Result, TransformedStep); - } - break; + break; + case Denormalize: + // Here we want to normalize step expressions for the same reasons, as + // stated above. + if (Pred(AR)) { + const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE)); + Result = SE.getAddExpr(Result, TransformedStep); } - return Result; - } - - if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { - SmallVector<const SCEV *, 8> Operands; - bool Changed = false; - // Transform each operand. - for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); - I != E; ++I) { - const SCEV *O = *I; - const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); - Changed |= N != O; - Operands.push_back(N); - } - // If any operand actually changed, return a transformed result. - if (Changed) - switch (S->getSCEVType()) { - case scAddExpr: return SE.getAddExpr(Operands); - case scMulExpr: return SE.getMulExpr(Operands); - case scSMaxExpr: return SE.getSMaxExpr(Operands); - case scUMaxExpr: return SE.getUMaxExpr(Operands); - default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); - } - return S; - } - - if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { - const SCEV *LO = X->getLHS(); - const SCEV *RO = X->getRHS(); - const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace); - const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace); - if (LO != LN || RO != RN) - return SE.getUDivExpr(LN, RN); - return S; + break; } - - llvm_unreachable("Unexpected SCEV kind!"); + return Result; } -/// Manage recursive transformation across an expression DAG. Revisiting -/// expressions would lead to exponential recursion. -const SCEV *PostIncTransform:: -TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { - - if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) - return S; - - const SCEV *Result = Transformed.lookup(S); - if (Result) - return Result; +const SCEV *llvm::normalizeForPostIncUse(const SCEV *S, + const PostIncLoopSet &Loops, + ScalarEvolution &SE) { + auto Pred = [&](const SCEVAddRecExpr *AR) { + return Loops.count(AR->getLoop()); + }; + return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S); +} - Result = TransformImpl(S, User, OperandValToReplace); - Transformed[S] = Result; - return Result; +const SCEV *llvm::normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred, + ScalarEvolution &SE) { + return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S); } -/// Top level driver for transforming an expression DAG into its requested -/// post-inc form (either "Normalized" or "Denormalized"). -const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, - const SCEV *S, - Instruction *User, - Value *OperandValToReplace, - PostIncLoopSet &Loops, - ScalarEvolution &SE, - DominatorTree &DT) { - PostIncTransform Transform(Kind, Loops, SE, DT); - return Transform.TransformSubExpr(S, User, OperandValToReplace); +const SCEV *llvm::denormalizeForPostIncUse(const SCEV *S, + const PostIncLoopSet &Loops, + ScalarEvolution &SE) { + auto Pred = [&](const SCEVAddRecExpr *AR) { + return Loops.count(AR->getLoop()); + }; + return NormalizeDenormalizeRewriter(Denormalize, Pred, SE).visit(S); } diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 79dc84e25533..470f4bee1e0a 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -195,7 +195,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, Succs.assign(TI.getNumSuccessors(), true); return; } - SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C)); + SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C)); Succs[Case.getSuccessorIndex()] = true; } diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 112118ab77eb..be734fa91425 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -82,24 +82,24 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { - TLI.setUnavailable(LibFunc::ldexp); - TLI.setUnavailable(LibFunc::ldexpf); - TLI.setUnavailable(LibFunc::ldexpl); - TLI.setUnavailable(LibFunc::exp10); - TLI.setUnavailable(LibFunc::exp10f); - TLI.setUnavailable(LibFunc::exp10l); - TLI.setUnavailable(LibFunc::log10); - TLI.setUnavailable(LibFunc::log10f); - TLI.setUnavailable(LibFunc::log10l); + TLI.setUnavailable(LibFunc_ldexp); + TLI.setUnavailable(LibFunc_ldexpf); + TLI.setUnavailable(LibFunc_ldexpl); + TLI.setUnavailable(LibFunc_exp10); + TLI.setUnavailable(LibFunc_exp10f); + TLI.setUnavailable(LibFunc_exp10l); + TLI.setUnavailable(LibFunc_log10); + TLI.setUnavailable(LibFunc_log10f); + TLI.setUnavailable(LibFunc_log10l); } // There are no library implementations of mempcy and memset for AMD gpus and // these can be difficult to lower in the backend. if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { - TLI.setUnavailable(LibFunc::memcpy); - TLI.setUnavailable(LibFunc::memset); - TLI.setUnavailable(LibFunc::memset_pattern16); + TLI.setUnavailable(LibFunc_memcpy); + TLI.setUnavailable(LibFunc_memset); + TLI.setUnavailable(LibFunc_memset_pattern16); return; } @@ -107,21 +107,21 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // All versions of watchOS support it. if (T.isMacOSX()) { if (T.isMacOSXVersionLT(10, 5)) - TLI.setUnavailable(LibFunc::memset_pattern16); + TLI.setUnavailable(LibFunc_memset_pattern16); } else if (T.isiOS()) { if (T.isOSVersionLT(3, 0)) - TLI.setUnavailable(LibFunc::memset_pattern16); + TLI.setUnavailable(LibFunc_memset_pattern16); } else if (!T.isWatchOS()) { - TLI.setUnavailable(LibFunc::memset_pattern16); + TLI.setUnavailable(LibFunc_memset_pattern16); } if (!hasSinCosPiStret(T)) { - TLI.setUnavailable(LibFunc::sinpi); - TLI.setUnavailable(LibFunc::sinpif); - TLI.setUnavailable(LibFunc::cospi); - TLI.setUnavailable(LibFunc::cospif); - TLI.setUnavailable(LibFunc::sincospi_stret); - TLI.setUnavailable(LibFunc::sincospif_stret); + TLI.setUnavailable(LibFunc_sinpi); + TLI.setUnavailable(LibFunc_sinpif); + TLI.setUnavailable(LibFunc_cospi); + TLI.setUnavailable(LibFunc_cospif); + TLI.setUnavailable(LibFunc_sincospi_stret); + TLI.setUnavailable(LibFunc_sincospif_stret); } if (T.isMacOSX() && T.getArch() == Triple::x86 && @@ -131,179 +131,179 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // has a $UNIX2003 suffix. The two implementations are identical except // for the return value in some edge cases. However, we don't want to // generate code that depends on the old symbols. - TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003"); - TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003"); + TLI.setAvailableWithName(LibFunc_fwrite, "fwrite$UNIX2003"); + TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003"); } // iprintf and friends are only available on XCore and TCE. if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) { - TLI.setUnavailable(LibFunc::iprintf); - TLI.setUnavailable(LibFunc::siprintf); - TLI.setUnavailable(LibFunc::fiprintf); + TLI.setUnavailable(LibFunc_iprintf); + TLI.setUnavailable(LibFunc_siprintf); + TLI.setUnavailable(LibFunc_fiprintf); } if (T.isOSWindows() && !T.isOSCygMing()) { // Win32 does not support long double - TLI.setUnavailable(LibFunc::acosl); - TLI.setUnavailable(LibFunc::asinl); - TLI.setUnavailable(LibFunc::atanl); - TLI.setUnavailable(LibFunc::atan2l); - TLI.setUnavailable(LibFunc::ceill); - TLI.setUnavailable(LibFunc::copysignl); - TLI.setUnavailable(LibFunc::cosl); - TLI.setUnavailable(LibFunc::coshl); - TLI.setUnavailable(LibFunc::expl); - TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf - TLI.setUnavailable(LibFunc::fabsl); - TLI.setUnavailable(LibFunc::floorl); - TLI.setUnavailable(LibFunc::fmaxl); - TLI.setUnavailable(LibFunc::fminl); - TLI.setUnavailable(LibFunc::fmodl); - TLI.setUnavailable(LibFunc::frexpl); - TLI.setUnavailable(LibFunc::ldexpf); - TLI.setUnavailable(LibFunc::ldexpl); - TLI.setUnavailable(LibFunc::logl); - TLI.setUnavailable(LibFunc::modfl); - TLI.setUnavailable(LibFunc::powl); - TLI.setUnavailable(LibFunc::sinl); - TLI.setUnavailable(LibFunc::sinhl); - TLI.setUnavailable(LibFunc::sqrtl); - TLI.setUnavailable(LibFunc::tanl); - TLI.setUnavailable(LibFunc::tanhl); + TLI.setUnavailable(LibFunc_acosl); + TLI.setUnavailable(LibFunc_asinl); + TLI.setUnavailable(LibFunc_atanl); + TLI.setUnavailable(LibFunc_atan2l); + TLI.setUnavailable(LibFunc_ceill); + TLI.setUnavailable(LibFunc_copysignl); + TLI.setUnavailable(LibFunc_cosl); + TLI.setUnavailable(LibFunc_coshl); + TLI.setUnavailable(LibFunc_expl); + TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf + TLI.setUnavailable(LibFunc_fabsl); + TLI.setUnavailable(LibFunc_floorl); + TLI.setUnavailable(LibFunc_fmaxl); + TLI.setUnavailable(LibFunc_fminl); + TLI.setUnavailable(LibFunc_fmodl); + TLI.setUnavailable(LibFunc_frexpl); + TLI.setUnavailable(LibFunc_ldexpf); + TLI.setUnavailable(LibFunc_ldexpl); + TLI.setUnavailable(LibFunc_logl); + TLI.setUnavailable(LibFunc_modfl); + TLI.setUnavailable(LibFunc_powl); + TLI.setUnavailable(LibFunc_sinl); + TLI.setUnavailable(LibFunc_sinhl); + TLI.setUnavailable(LibFunc_sqrtl); + TLI.setUnavailable(LibFunc_tanl); + TLI.setUnavailable(LibFunc_tanhl); // Win32 only has C89 math - TLI.setUnavailable(LibFunc::acosh); - TLI.setUnavailable(LibFunc::acoshf); - TLI.setUnavailable(LibFunc::acoshl); - TLI.setUnavailable(LibFunc::asinh); - TLI.setUnavailable(LibFunc::asinhf); - TLI.setUnavailable(LibFunc::asinhl); - TLI.setUnavailable(LibFunc::atanh); - TLI.setUnavailable(LibFunc::atanhf); - TLI.setUnavailable(LibFunc::atanhl); - TLI.setUnavailable(LibFunc::cbrt); - TLI.setUnavailable(LibFunc::cbrtf); - TLI.setUnavailable(LibFunc::cbrtl); - TLI.setUnavailable(LibFunc::exp2); - TLI.setUnavailable(LibFunc::exp2f); - TLI.setUnavailable(LibFunc::exp2l); - TLI.setUnavailable(LibFunc::expm1); - TLI.setUnavailable(LibFunc::expm1f); - TLI.setUnavailable(LibFunc::expm1l); - TLI.setUnavailable(LibFunc::log2); - TLI.setUnavailable(LibFunc::log2f); - TLI.setUnavailable(LibFunc::log2l); - TLI.setUnavailable(LibFunc::log1p); - TLI.setUnavailable(LibFunc::log1pf); - TLI.setUnavailable(LibFunc::log1pl); - TLI.setUnavailable(LibFunc::logb); - TLI.setUnavailable(LibFunc::logbf); - TLI.setUnavailable(LibFunc::logbl); - TLI.setUnavailable(LibFunc::nearbyint); - TLI.setUnavailable(LibFunc::nearbyintf); - TLI.setUnavailable(LibFunc::nearbyintl); - TLI.setUnavailable(LibFunc::rint); - TLI.setUnavailable(LibFunc::rintf); - TLI.setUnavailable(LibFunc::rintl); - TLI.setUnavailable(LibFunc::round); - TLI.setUnavailable(LibFunc::roundf); - TLI.setUnavailable(LibFunc::roundl); - TLI.setUnavailable(LibFunc::trunc); - TLI.setUnavailable(LibFunc::truncf); - TLI.setUnavailable(LibFunc::truncl); + TLI.setUnavailable(LibFunc_acosh); + TLI.setUnavailable(LibFunc_acoshf); + TLI.setUnavailable(LibFunc_acoshl); + TLI.setUnavailable(LibFunc_asinh); + TLI.setUnavailable(LibFunc_asinhf); + TLI.setUnavailable(LibFunc_asinhl); + TLI.setUnavailable(LibFunc_atanh); + TLI.setUnavailable(LibFunc_atanhf); + TLI.setUnavailable(LibFunc_atanhl); + TLI.setUnavailable(LibFunc_cbrt); + TLI.setUnavailable(LibFunc_cbrtf); + TLI.setUnavailable(LibFunc_cbrtl); + TLI.setUnavailable(LibFunc_exp2); + TLI.setUnavailable(LibFunc_exp2f); + TLI.setUnavailable(LibFunc_exp2l); + TLI.setUnavailable(LibFunc_expm1); + TLI.setUnavailable(LibFunc_expm1f); + TLI.setUnavailable(LibFunc_expm1l); + TLI.setUnavailable(LibFunc_log2); + TLI.setUnavailable(LibFunc_log2f); + TLI.setUnavailable(LibFunc_log2l); + TLI.setUnavailable(LibFunc_log1p); + TLI.setUnavailable(LibFunc_log1pf); + TLI.setUnavailable(LibFunc_log1pl); + TLI.setUnavailable(LibFunc_logb); + TLI.setUnavailable(LibFunc_logbf); + TLI.setUnavailable(LibFunc_logbl); + TLI.setUnavailable(LibFunc_nearbyint); + TLI.setUnavailable(LibFunc_nearbyintf); + TLI.setUnavailable(LibFunc_nearbyintl); + TLI.setUnavailable(LibFunc_rint); + TLI.setUnavailable(LibFunc_rintf); + TLI.setUnavailable(LibFunc_rintl); + TLI.setUnavailable(LibFunc_round); + TLI.setUnavailable(LibFunc_roundf); + TLI.setUnavailable(LibFunc_roundl); + TLI.setUnavailable(LibFunc_trunc); + TLI.setUnavailable(LibFunc_truncf); + TLI.setUnavailable(LibFunc_truncl); // Win32 provides some C99 math with mangled names - TLI.setAvailableWithName(LibFunc::copysign, "_copysign"); + TLI.setAvailableWithName(LibFunc_copysign, "_copysign"); if (T.getArch() == Triple::x86) { // Win32 on x86 implements single-precision math functions as macros - TLI.setUnavailable(LibFunc::acosf); - TLI.setUnavailable(LibFunc::asinf); - TLI.setUnavailable(LibFunc::atanf); - TLI.setUnavailable(LibFunc::atan2f); - TLI.setUnavailable(LibFunc::ceilf); - TLI.setUnavailable(LibFunc::copysignf); - TLI.setUnavailable(LibFunc::cosf); - TLI.setUnavailable(LibFunc::coshf); - TLI.setUnavailable(LibFunc::expf); - TLI.setUnavailable(LibFunc::floorf); - TLI.setUnavailable(LibFunc::fminf); - TLI.setUnavailable(LibFunc::fmaxf); - TLI.setUnavailable(LibFunc::fmodf); - TLI.setUnavailable(LibFunc::logf); - TLI.setUnavailable(LibFunc::log10f); - TLI.setUnavailable(LibFunc::modff); - TLI.setUnavailable(LibFunc::powf); - TLI.setUnavailable(LibFunc::sinf); - TLI.setUnavailable(LibFunc::sinhf); - TLI.setUnavailable(LibFunc::sqrtf); - TLI.setUnavailable(LibFunc::tanf); - TLI.setUnavailable(LibFunc::tanhf); + TLI.setUnavailable(LibFunc_acosf); + TLI.setUnavailable(LibFunc_asinf); + TLI.setUnavailable(LibFunc_atanf); + TLI.setUnavailable(LibFunc_atan2f); + TLI.setUnavailable(LibFunc_ceilf); + TLI.setUnavailable(LibFunc_copysignf); + TLI.setUnavailable(LibFunc_cosf); + TLI.setUnavailable(LibFunc_coshf); + TLI.setUnavailable(LibFunc_expf); + TLI.setUnavailable(LibFunc_floorf); + TLI.setUnavailable(LibFunc_fminf); + TLI.setUnavailable(LibFunc_fmaxf); + TLI.setUnavailable(LibFunc_fmodf); + TLI.setUnavailable(LibFunc_logf); + TLI.setUnavailable(LibFunc_log10f); + TLI.setUnavailable(LibFunc_modff); + TLI.setUnavailable(LibFunc_powf); + TLI.setUnavailable(LibFunc_sinf); + TLI.setUnavailable(LibFunc_sinhf); + TLI.setUnavailable(LibFunc_sqrtf); + TLI.setUnavailable(LibFunc_tanf); + TLI.setUnavailable(LibFunc_tanhf); } // Win32 does *not* provide provide these functions, but they are // generally available on POSIX-compliant systems: - TLI.setUnavailable(LibFunc::access); - TLI.setUnavailable(LibFunc::bcmp); - TLI.setUnavailable(LibFunc::bcopy); - TLI.setUnavailable(LibFunc::bzero); - TLI.setUnavailable(LibFunc::chmod); - TLI.setUnavailable(LibFunc::chown); - TLI.setUnavailable(LibFunc::closedir); - TLI.setUnavailable(LibFunc::ctermid); - TLI.setUnavailable(LibFunc::fdopen); - TLI.setUnavailable(LibFunc::ffs); - TLI.setUnavailable(LibFunc::fileno); - TLI.setUnavailable(LibFunc::flockfile); - TLI.setUnavailable(LibFunc::fseeko); - TLI.setUnavailable(LibFunc::fstat); - TLI.setUnavailable(LibFunc::fstatvfs); - TLI.setUnavailable(LibFunc::ftello); - TLI.setUnavailable(LibFunc::ftrylockfile); - TLI.setUnavailable(LibFunc::funlockfile); - TLI.setUnavailable(LibFunc::getc_unlocked); - TLI.setUnavailable(LibFunc::getitimer); - TLI.setUnavailable(LibFunc::getlogin_r); - TLI.setUnavailable(LibFunc::getpwnam); - TLI.setUnavailable(LibFunc::gettimeofday); - TLI.setUnavailable(LibFunc::htonl); - TLI.setUnavailable(LibFunc::htons); - TLI.setUnavailable(LibFunc::lchown); - TLI.setUnavailable(LibFunc::lstat); - TLI.setUnavailable(LibFunc::memccpy); - TLI.setUnavailable(LibFunc::mkdir); - TLI.setUnavailable(LibFunc::ntohl); - TLI.setUnavailable(LibFunc::ntohs); - TLI.setUnavailable(LibFunc::open); - TLI.setUnavailable(LibFunc::opendir); - TLI.setUnavailable(LibFunc::pclose); - TLI.setUnavailable(LibFunc::popen); - TLI.setUnavailable(LibFunc::pread); - TLI.setUnavailable(LibFunc::pwrite); - TLI.setUnavailable(LibFunc::read); - TLI.setUnavailable(LibFunc::readlink); - TLI.setUnavailable(LibFunc::realpath); - TLI.setUnavailable(LibFunc::rmdir); - TLI.setUnavailable(LibFunc::setitimer); - TLI.setUnavailable(LibFunc::stat); - TLI.setUnavailable(LibFunc::statvfs); - TLI.setUnavailable(LibFunc::stpcpy); - TLI.setUnavailable(LibFunc::stpncpy); - TLI.setUnavailable(LibFunc::strcasecmp); - TLI.setUnavailable(LibFunc::strncasecmp); - TLI.setUnavailable(LibFunc::times); - TLI.setUnavailable(LibFunc::uname); - TLI.setUnavailable(LibFunc::unlink); - TLI.setUnavailable(LibFunc::unsetenv); - TLI.setUnavailable(LibFunc::utime); - TLI.setUnavailable(LibFunc::utimes); - TLI.setUnavailable(LibFunc::write); + TLI.setUnavailable(LibFunc_access); + TLI.setUnavailable(LibFunc_bcmp); + TLI.setUnavailable(LibFunc_bcopy); + TLI.setUnavailable(LibFunc_bzero); + TLI.setUnavailable(LibFunc_chmod); + TLI.setUnavailable(LibFunc_chown); + TLI.setUnavailable(LibFunc_closedir); + TLI.setUnavailable(LibFunc_ctermid); + TLI.setUnavailable(LibFunc_fdopen); + TLI.setUnavailable(LibFunc_ffs); + TLI.setUnavailable(LibFunc_fileno); + TLI.setUnavailable(LibFunc_flockfile); + TLI.setUnavailable(LibFunc_fseeko); + TLI.setUnavailable(LibFunc_fstat); + TLI.setUnavailable(LibFunc_fstatvfs); + TLI.setUnavailable(LibFunc_ftello); + TLI.setUnavailable(LibFunc_ftrylockfile); + TLI.setUnavailable(LibFunc_funlockfile); + TLI.setUnavailable(LibFunc_getc_unlocked); + TLI.setUnavailable(LibFunc_getitimer); + TLI.setUnavailable(LibFunc_getlogin_r); + TLI.setUnavailable(LibFunc_getpwnam); + TLI.setUnavailable(LibFunc_gettimeofday); + TLI.setUnavailable(LibFunc_htonl); + TLI.setUnavailable(LibFunc_htons); + TLI.setUnavailable(LibFunc_lchown); + TLI.setUnavailable(LibFunc_lstat); + TLI.setUnavailable(LibFunc_memccpy); + TLI.setUnavailable(LibFunc_mkdir); + TLI.setUnavailable(LibFunc_ntohl); + TLI.setUnavailable(LibFunc_ntohs); + TLI.setUnavailable(LibFunc_open); + TLI.setUnavailable(LibFunc_opendir); + TLI.setUnavailable(LibFunc_pclose); + TLI.setUnavailable(LibFunc_popen); + TLI.setUnavailable(LibFunc_pread); + TLI.setUnavailable(LibFunc_pwrite); + TLI.setUnavailable(LibFunc_read); + TLI.setUnavailable(LibFunc_readlink); + TLI.setUnavailable(LibFunc_realpath); + TLI.setUnavailable(LibFunc_rmdir); + TLI.setUnavailable(LibFunc_setitimer); + TLI.setUnavailable(LibFunc_stat); + TLI.setUnavailable(LibFunc_statvfs); + TLI.setUnavailable(LibFunc_stpcpy); + TLI.setUnavailable(LibFunc_stpncpy); + TLI.setUnavailable(LibFunc_strcasecmp); + TLI.setUnavailable(LibFunc_strncasecmp); + TLI.setUnavailable(LibFunc_times); + TLI.setUnavailable(LibFunc_uname); + TLI.setUnavailable(LibFunc_unlink); + TLI.setUnavailable(LibFunc_unsetenv); + TLI.setUnavailable(LibFunc_utime); + TLI.setUnavailable(LibFunc_utimes); + TLI.setUnavailable(LibFunc_write); // Win32 does *not* provide provide these functions, but they are // specified by C99: - TLI.setUnavailable(LibFunc::atoll); - TLI.setUnavailable(LibFunc::frexpf); - TLI.setUnavailable(LibFunc::llabs); + TLI.setUnavailable(LibFunc_atoll); + TLI.setUnavailable(LibFunc_frexpf); + TLI.setUnavailable(LibFunc_llabs); } switch (T.getOS()) { @@ -311,28 +311,28 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0 // and their names are __exp10 and __exp10f. exp10l is not available on // OS X or iOS. - TLI.setUnavailable(LibFunc::exp10l); + TLI.setUnavailable(LibFunc_exp10l); if (T.isMacOSXVersionLT(10, 9)) { - TLI.setUnavailable(LibFunc::exp10); - TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc_exp10); + TLI.setUnavailable(LibFunc_exp10f); } else { - TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); - TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + TLI.setAvailableWithName(LibFunc_exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc_exp10f, "__exp10f"); } break; case Triple::IOS: case Triple::TvOS: case Triple::WatchOS: - TLI.setUnavailable(LibFunc::exp10l); + TLI.setUnavailable(LibFunc_exp10l); if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) || (T.isOSVersionLT(9, 0) && (T.getArch() == Triple::x86 || T.getArch() == Triple::x86_64)))) { - TLI.setUnavailable(LibFunc::exp10); - TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc_exp10); + TLI.setUnavailable(LibFunc_exp10f); } else { - TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); - TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + TLI.setAvailableWithName(LibFunc_exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc_exp10f, "__exp10f"); } break; case Triple::Linux: @@ -344,9 +344,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // Fall through to disable all of them. LLVM_FALLTHROUGH; default: - TLI.setUnavailable(LibFunc::exp10); - TLI.setUnavailable(LibFunc::exp10f); - TLI.setUnavailable(LibFunc::exp10l); + TLI.setUnavailable(LibFunc_exp10); + TLI.setUnavailable(LibFunc_exp10f); + TLI.setUnavailable(LibFunc_exp10l); } // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and @@ -364,7 +364,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, case Triple::Linux: break; default: - TLI.setUnavailable(LibFunc::ffsl); + TLI.setUnavailable(LibFunc_ffsl); } // ffsll is available on at least FreeBSD and Linux (GLIBC): @@ -380,7 +380,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, case Triple::Linux: break; default: - TLI.setUnavailable(LibFunc::ffsll); + TLI.setUnavailable(LibFunc_ffsll); } // The following functions are available on at least FreeBSD: @@ -388,30 +388,30 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c if (!T.isOSFreeBSD()) { - TLI.setUnavailable(LibFunc::fls); - TLI.setUnavailable(LibFunc::flsl); - TLI.setUnavailable(LibFunc::flsll); + TLI.setUnavailable(LibFunc_fls); + TLI.setUnavailable(LibFunc_flsl); + TLI.setUnavailable(LibFunc_flsll); } // The following functions are available on at least Linux: if (!T.isOSLinux()) { - TLI.setUnavailable(LibFunc::dunder_strdup); - TLI.setUnavailable(LibFunc::dunder_strtok_r); - TLI.setUnavailable(LibFunc::dunder_isoc99_scanf); - TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf); - TLI.setUnavailable(LibFunc::under_IO_getc); - TLI.setUnavailable(LibFunc::under_IO_putc); - TLI.setUnavailable(LibFunc::memalign); - TLI.setUnavailable(LibFunc::fopen64); - TLI.setUnavailable(LibFunc::fseeko64); - TLI.setUnavailable(LibFunc::fstat64); - TLI.setUnavailable(LibFunc::fstatvfs64); - TLI.setUnavailable(LibFunc::ftello64); - TLI.setUnavailable(LibFunc::lstat64); - TLI.setUnavailable(LibFunc::open64); - TLI.setUnavailable(LibFunc::stat64); - TLI.setUnavailable(LibFunc::statvfs64); - TLI.setUnavailable(LibFunc::tmpfile64); + TLI.setUnavailable(LibFunc_dunder_strdup); + TLI.setUnavailable(LibFunc_dunder_strtok_r); + TLI.setUnavailable(LibFunc_dunder_isoc99_scanf); + TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf); + TLI.setUnavailable(LibFunc_under_IO_getc); + TLI.setUnavailable(LibFunc_under_IO_putc); + TLI.setUnavailable(LibFunc_memalign); + TLI.setUnavailable(LibFunc_fopen64); + TLI.setUnavailable(LibFunc_fseeko64); + TLI.setUnavailable(LibFunc_fstat64); + TLI.setUnavailable(LibFunc_fstatvfs64); + TLI.setUnavailable(LibFunc_ftello64); + TLI.setUnavailable(LibFunc_lstat64); + TLI.setUnavailable(LibFunc_open64); + TLI.setUnavailable(LibFunc_stat64); + TLI.setUnavailable(LibFunc_statvfs64); + TLI.setUnavailable(LibFunc_tmpfile64); } // As currently implemented in clang, NVPTX code has no standard library to @@ -427,9 +427,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // optimizations, so this situation should be fixed. if (T.isNVPTX()) { TLI.disableAllFunctions(); - TLI.setAvailable(LibFunc::nvvm_reflect); + TLI.setAvailable(LibFunc_nvvm_reflect); } else { - TLI.setUnavailable(LibFunc::nvvm_reflect); + TLI.setUnavailable(LibFunc_nvvm_reflect); } TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary); @@ -500,9 +500,9 @@ static StringRef sanitizeFunctionName(StringRef funcName) { } bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, - LibFunc::Func &F) const { + LibFunc &F) const { StringRef const *Start = &StandardNames[0]; - StringRef const *End = &StandardNames[LibFunc::NumLibFuncs]; + StringRef const *End = &StandardNames[NumLibFuncs]; funcName = sanitizeFunctionName(funcName); if (funcName.empty()) @@ -513,14 +513,14 @@ bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, return LHS < RHS; }); if (I != End && *I == funcName) { - F = (LibFunc::Func)(I - Start); + F = (LibFunc)(I - Start); return true; } return false; } bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, - LibFunc::Func F, + LibFunc F, const DataLayout *DL) const { LLVMContext &Ctx = FTy.getContext(); Type *PCharTy = Type::getInt8PtrTy(Ctx); @@ -531,504 +531,660 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, unsigned NumParams = FTy.getNumParams(); switch (F) { - case LibFunc::strlen: + case LibFunc_strlen: return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() && FTy.getReturnType()->isIntegerTy()); - case LibFunc::strchr: - case LibFunc::strrchr: + case LibFunc_strchr: + case LibFunc_strrchr: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && FTy.getParamType(1)->isIntegerTy()); - case LibFunc::strtol: - case LibFunc::strtod: - case LibFunc::strtof: - case LibFunc::strtoul: - case LibFunc::strtoll: - case LibFunc::strtold: - case LibFunc::strtoull: + case LibFunc_strtol: + case LibFunc_strtod: + case LibFunc_strtof: + case LibFunc_strtoul: + case LibFunc_strtoll: + case LibFunc_strtold: + case LibFunc_strtoull: return ((NumParams == 2 || NumParams == 3) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::strcat: + case LibFunc_strcat: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && FTy.getParamType(1) == FTy.getReturnType()); - case LibFunc::strncat: + case LibFunc_strncat: return (NumParams == 3 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && FTy.getParamType(1) == FTy.getReturnType() && FTy.getParamType(2)->isIntegerTy()); - case LibFunc::strcpy_chk: - case LibFunc::stpcpy_chk: + case LibFunc_strcpy_chk: + case LibFunc_stpcpy_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; LLVM_FALLTHROUGH; - case LibFunc::strcpy: - case LibFunc::stpcpy: + case LibFunc_strcpy: + case LibFunc_stpcpy: return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0) == FTy.getParamType(1) && FTy.getParamType(0) == PCharTy); - case LibFunc::strncpy_chk: - case LibFunc::stpncpy_chk: + case LibFunc_strncpy_chk: + case LibFunc_stpncpy_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; LLVM_FALLTHROUGH; - case LibFunc::strncpy: - case LibFunc::stpncpy: + case LibFunc_strncpy: + case LibFunc_stpncpy: return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0) == FTy.getParamType(1) && FTy.getParamType(0) == PCharTy && FTy.getParamType(2)->isIntegerTy()); - case LibFunc::strxfrm: + case LibFunc_strxfrm: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::strcmp: + case LibFunc_strcmp: return (NumParams == 2 && FTy.getReturnType()->isIntegerTy(32) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(0) == FTy.getParamType(1)); - case LibFunc::strncmp: + case LibFunc_strncmp: return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(0) == FTy.getParamType(1) && FTy.getParamType(2)->isIntegerTy()); - case LibFunc::strspn: - case LibFunc::strcspn: + case LibFunc_strspn: + case LibFunc_strcspn: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(0) == FTy.getParamType(1) && FTy.getReturnType()->isIntegerTy()); - case LibFunc::strcoll: - case LibFunc::strcasecmp: - case LibFunc::strncasecmp: + case LibFunc_strcoll: + case LibFunc_strcasecmp: + case LibFunc_strncasecmp: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::strstr: + case LibFunc_strstr: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::strpbrk: + case LibFunc_strpbrk: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0) == FTy.getParamType(1)); - case LibFunc::strtok: - case LibFunc::strtok_r: + case LibFunc_strtok: + case LibFunc_strtok_r: return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::scanf: - case LibFunc::setbuf: - case LibFunc::setvbuf: + case LibFunc_scanf: + case LibFunc_setbuf: + case LibFunc_setvbuf: return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::strdup: - case LibFunc::strndup: + case LibFunc_strdup: + case LibFunc_strndup: return (NumParams >= 1 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy()); - case LibFunc::sscanf: - case LibFunc::stat: - case LibFunc::statvfs: - case LibFunc::sprintf: + case LibFunc_sscanf: + case LibFunc_stat: + case LibFunc_statvfs: + case LibFunc_siprintf: + case LibFunc_sprintf: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::snprintf: + case LibFunc_snprintf: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::setitimer: + case LibFunc_setitimer: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::system: + case LibFunc_system: return (NumParams == 1 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::malloc: + case LibFunc_malloc: return (NumParams == 1 && FTy.getReturnType()->isPointerTy()); - case LibFunc::memcmp: - return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && - FTy.getParamType(1)->isPointerTy() && - FTy.getReturnType()->isIntegerTy(32)); + case LibFunc_memcmp: + return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && + FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(1)->isPointerTy()); - case LibFunc::memchr: - case LibFunc::memrchr: - return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && + case LibFunc_memchr: + case LibFunc_memrchr: + return (NumParams == 3 && FTy.getReturnType()->isPointerTy() && + FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(1)->isIntegerTy(32) && - FTy.getParamType(2)->isIntegerTy() && - FTy.getReturnType()->isPointerTy()); - case LibFunc::modf: - case LibFunc::modff: - case LibFunc::modfl: + IsSizeTTy(FTy.getParamType(2))); + case LibFunc_modf: + case LibFunc_modff: + case LibFunc_modfl: return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::memcpy_chk: - case LibFunc::memmove_chk: + case LibFunc_memcpy_chk: + case LibFunc_memmove_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; LLVM_FALLTHROUGH; - case LibFunc::memcpy: - case LibFunc::mempcpy: - case LibFunc::memmove: + case LibFunc_memcpy: + case LibFunc_mempcpy: + case LibFunc_memmove: return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy() && IsSizeTTy(FTy.getParamType(2))); - case LibFunc::memset_chk: + case LibFunc_memset_chk: --NumParams; if (!IsSizeTTy(FTy.getParamType(NumParams))) return false; LLVM_FALLTHROUGH; - case LibFunc::memset: + case LibFunc_memset: return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isIntegerTy() && IsSizeTTy(FTy.getParamType(2))); - case LibFunc::memccpy: + case LibFunc_memccpy: return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::memalign: + case LibFunc_memalign: return (FTy.getReturnType()->isPointerTy()); - case LibFunc::realloc: - return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && - FTy.getReturnType()->isPointerTy()); - case LibFunc::read: + case LibFunc_realloc: + case LibFunc_reallocf: + return (NumParams == 2 && FTy.getReturnType() == PCharTy && + FTy.getParamType(0) == FTy.getReturnType() && + IsSizeTTy(FTy.getParamType(1))); + case LibFunc_read: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::rewind: - case LibFunc::rmdir: - case LibFunc::remove: - case LibFunc::realpath: + case LibFunc_rewind: + case LibFunc_rmdir: + case LibFunc_remove: + case LibFunc_realpath: return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::rename: + case LibFunc_rename: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::readlink: + case LibFunc_readlink: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::write: + case LibFunc_write: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::bcopy: - case LibFunc::bcmp: + case LibFunc_bcopy: + case LibFunc_bcmp: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::bzero: + case LibFunc_bzero: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::calloc: + case LibFunc_calloc: return (NumParams == 2 && FTy.getReturnType()->isPointerTy()); - case LibFunc::atof: - case LibFunc::atoi: - case LibFunc::atol: - case LibFunc::atoll: - case LibFunc::ferror: - case LibFunc::getenv: - case LibFunc::getpwnam: - case LibFunc::pclose: - case LibFunc::perror: - case LibFunc::printf: - case LibFunc::puts: - case LibFunc::uname: - case LibFunc::under_IO_getc: - case LibFunc::unlink: - case LibFunc::unsetenv: + case LibFunc_atof: + case LibFunc_atoi: + case LibFunc_atol: + case LibFunc_atoll: + case LibFunc_ferror: + case LibFunc_getenv: + case LibFunc_getpwnam: + case LibFunc_iprintf: + case LibFunc_pclose: + case LibFunc_perror: + case LibFunc_printf: + case LibFunc_puts: + case LibFunc_uname: + case LibFunc_under_IO_getc: + case LibFunc_unlink: + case LibFunc_unsetenv: return (NumParams == 1 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::chmod: - case LibFunc::chown: - case LibFunc::clearerr: - case LibFunc::closedir: - case LibFunc::ctermid: - case LibFunc::fclose: - case LibFunc::feof: - case LibFunc::fflush: - case LibFunc::fgetc: - case LibFunc::fileno: - case LibFunc::flockfile: - case LibFunc::free: - case LibFunc::fseek: - case LibFunc::fseeko64: - case LibFunc::fseeko: - case LibFunc::fsetpos: - case LibFunc::ftell: - case LibFunc::ftello64: - case LibFunc::ftello: - case LibFunc::ftrylockfile: - case LibFunc::funlockfile: - case LibFunc::getc: - case LibFunc::getc_unlocked: - case LibFunc::getlogin_r: - case LibFunc::mkdir: - case LibFunc::mktime: - case LibFunc::times: + case LibFunc_access: + case LibFunc_chmod: + case LibFunc_chown: + case LibFunc_clearerr: + case LibFunc_closedir: + case LibFunc_ctermid: + case LibFunc_fclose: + case LibFunc_feof: + case LibFunc_fflush: + case LibFunc_fgetc: + case LibFunc_fileno: + case LibFunc_flockfile: + case LibFunc_free: + case LibFunc_fseek: + case LibFunc_fseeko64: + case LibFunc_fseeko: + case LibFunc_fsetpos: + case LibFunc_ftell: + case LibFunc_ftello64: + case LibFunc_ftello: + case LibFunc_ftrylockfile: + case LibFunc_funlockfile: + case LibFunc_getc: + case LibFunc_getc_unlocked: + case LibFunc_getlogin_r: + case LibFunc_mkdir: + case LibFunc_mktime: + case LibFunc_times: return (NumParams != 0 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::access: - return (NumParams == 2 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::fopen: + case LibFunc_fopen: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::fdopen: + case LibFunc_fdopen: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::fputc: - case LibFunc::fstat: - case LibFunc::frexp: - case LibFunc::frexpf: - case LibFunc::frexpl: - case LibFunc::fstatvfs: + case LibFunc_fputc: + case LibFunc_fstat: + case LibFunc_frexp: + case LibFunc_frexpf: + case LibFunc_frexpl: + case LibFunc_fstatvfs: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::fgets: + case LibFunc_fgets: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::fread: + case LibFunc_fread: return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(3)->isPointerTy()); - case LibFunc::fwrite: + case LibFunc_fwrite: return (NumParams == 4 && FTy.getReturnType()->isIntegerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isIntegerTy() && FTy.getParamType(2)->isIntegerTy() && FTy.getParamType(3)->isPointerTy()); - case LibFunc::fputs: + case LibFunc_fputs: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::fscanf: - case LibFunc::fprintf: - return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && + case LibFunc_fscanf: + case LibFunc_fiprintf: + case LibFunc_fprintf: + return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() && + FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::fgetpos: + case LibFunc_fgetpos: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::gets: - case LibFunc::getchar: - case LibFunc::getitimer: + case LibFunc_getchar: + return (NumParams == 0 && FTy.getReturnType()->isIntegerTy()); + case LibFunc_gets: + return (NumParams == 1 && FTy.getParamType(0) == PCharTy); + case LibFunc_getitimer: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::ungetc: + case LibFunc_ungetc: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::utime: - case LibFunc::utimes: + case LibFunc_utime: + case LibFunc_utimes: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::putc: + case LibFunc_putc: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::pread: - case LibFunc::pwrite: + case LibFunc_pread: + case LibFunc_pwrite: return (NumParams == 4 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::popen: + case LibFunc_popen: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::vscanf: + case LibFunc_vscanf: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::vsscanf: + case LibFunc_vsscanf: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::vfscanf: + case LibFunc_vfscanf: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::valloc: + case LibFunc_valloc: return (FTy.getReturnType()->isPointerTy()); - case LibFunc::vprintf: + case LibFunc_vprintf: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::vfprintf: - case LibFunc::vsprintf: + case LibFunc_vfprintf: + case LibFunc_vsprintf: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::vsnprintf: + case LibFunc_vsnprintf: return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::open: + case LibFunc_open: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::opendir: + case LibFunc_opendir: return (NumParams == 1 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy()); - case LibFunc::tmpfile: + case LibFunc_tmpfile: return (FTy.getReturnType()->isPointerTy()); - case LibFunc::htonl: - case LibFunc::htons: - case LibFunc::ntohl: - case LibFunc::ntohs: - case LibFunc::lstat: + case LibFunc_htonl: + case LibFunc_ntohl: + return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) && + FTy.getReturnType() == FTy.getParamType(0)); + case LibFunc_htons: + case LibFunc_ntohs: + return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(16) && + FTy.getReturnType() == FTy.getParamType(0)); + case LibFunc_lstat: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::lchown: + case LibFunc_lchown: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::qsort: + case LibFunc_qsort: return (NumParams == 4 && FTy.getParamType(3)->isPointerTy()); - case LibFunc::dunder_strdup: - case LibFunc::dunder_strndup: + case LibFunc_dunder_strdup: + case LibFunc_dunder_strndup: return (NumParams >= 1 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy()); - case LibFunc::dunder_strtok_r: + case LibFunc_dunder_strtok_r: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::under_IO_putc: + case LibFunc_under_IO_putc: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::dunder_isoc99_scanf: + case LibFunc_dunder_isoc99_scanf: return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::stat64: - case LibFunc::lstat64: - case LibFunc::statvfs64: + case LibFunc_stat64: + case LibFunc_lstat64: + case LibFunc_statvfs64: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::dunder_isoc99_sscanf: + case LibFunc_dunder_isoc99_sscanf: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::fopen64: + case LibFunc_fopen64: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::tmpfile64: + case LibFunc_tmpfile64: return (FTy.getReturnType()->isPointerTy()); - case LibFunc::fstat64: - case LibFunc::fstatvfs64: + case LibFunc_fstat64: + case LibFunc_fstatvfs64: return (NumParams == 2 && FTy.getParamType(1)->isPointerTy()); - case LibFunc::open64: + case LibFunc_open64: return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy()); - case LibFunc::gettimeofday: + case LibFunc_gettimeofday: return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy()); - case LibFunc::Znwj: // new(unsigned int); - case LibFunc::Znwm: // new(unsigned long); - case LibFunc::Znaj: // new[](unsigned int); - case LibFunc::Znam: // new[](unsigned long); - case LibFunc::msvc_new_int: // new(unsigned int); - case LibFunc::msvc_new_longlong: // new(unsigned long long); - case LibFunc::msvc_new_array_int: // new[](unsigned int); - case LibFunc::msvc_new_array_longlong: // new[](unsigned long long); - return (NumParams == 1); - - case LibFunc::memset_pattern16: + // new(unsigned int); + case LibFunc_Znwj: + // new(unsigned long); + case LibFunc_Znwm: + // new[](unsigned int); + case LibFunc_Znaj: + // new[](unsigned long); + case LibFunc_Znam: + // new(unsigned int); + case LibFunc_msvc_new_int: + // new(unsigned long long); + case LibFunc_msvc_new_longlong: + // new[](unsigned int); + case LibFunc_msvc_new_array_int: + // new[](unsigned long long); + case LibFunc_msvc_new_array_longlong: + return (NumParams == 1 && FTy.getReturnType()->isPointerTy()); + + // new(unsigned int, nothrow); + case LibFunc_ZnwjRKSt9nothrow_t: + // new(unsigned long, nothrow); + case LibFunc_ZnwmRKSt9nothrow_t: + // new[](unsigned int, nothrow); + case LibFunc_ZnajRKSt9nothrow_t: + // new[](unsigned long, nothrow); + case LibFunc_ZnamRKSt9nothrow_t: + // new(unsigned int, nothrow); + case LibFunc_msvc_new_int_nothrow: + // new(unsigned long long, nothrow); + case LibFunc_msvc_new_longlong_nothrow: + // new[](unsigned int, nothrow); + case LibFunc_msvc_new_array_int_nothrow: + // new[](unsigned long long, nothrow); + case LibFunc_msvc_new_array_longlong_nothrow: + return (NumParams == 2 && FTy.getReturnType()->isPointerTy()); + + // void operator delete[](void*); + case LibFunc_ZdaPv: + // void operator delete(void*); + case LibFunc_ZdlPv: + // void operator delete[](void*); + case LibFunc_msvc_delete_array_ptr32: + // void operator delete[](void*); + case LibFunc_msvc_delete_array_ptr64: + // void operator delete(void*); + case LibFunc_msvc_delete_ptr32: + // void operator delete(void*); + case LibFunc_msvc_delete_ptr64: + return (NumParams == 1 && FTy.getParamType(0)->isPointerTy()); + + // void operator delete[](void*, nothrow); + case LibFunc_ZdaPvRKSt9nothrow_t: + // void operator delete[](void*, unsigned int); + case LibFunc_ZdaPvj: + // void operator delete[](void*, unsigned long); + case LibFunc_ZdaPvm: + // void operator delete(void*, nothrow); + case LibFunc_ZdlPvRKSt9nothrow_t: + // void operator delete(void*, unsigned int); + case LibFunc_ZdlPvj: + // void operator delete(void*, unsigned long); + case LibFunc_ZdlPvm: + // void operator delete[](void*, unsigned int); + case LibFunc_msvc_delete_array_ptr32_int: + // void operator delete[](void*, nothrow); + case LibFunc_msvc_delete_array_ptr32_nothrow: + // void operator delete[](void*, unsigned long long); + case LibFunc_msvc_delete_array_ptr64_longlong: + // void operator delete[](void*, nothrow); + case LibFunc_msvc_delete_array_ptr64_nothrow: + // void operator delete(void*, unsigned int); + case LibFunc_msvc_delete_ptr32_int: + // void operator delete(void*, nothrow); + case LibFunc_msvc_delete_ptr32_nothrow: + // void operator delete(void*, unsigned long long); + case LibFunc_msvc_delete_ptr64_longlong: + // void operator delete(void*, nothrow); + case LibFunc_msvc_delete_ptr64_nothrow: + return (NumParams == 2 && FTy.getParamType(0)->isPointerTy()); + + case LibFunc_memset_pattern16: return (!FTy.isVarArg() && NumParams == 3 && - isa<PointerType>(FTy.getParamType(0)) && - isa<PointerType>(FTy.getParamType(1)) && - isa<IntegerType>(FTy.getParamType(2))); - - // int __nvvm_reflect(const char *); - case LibFunc::nvvm_reflect: - return (NumParams == 1 && isa<PointerType>(FTy.getParamType(0))); - - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: - case LibFunc::tan: - case LibFunc::tanf: - case LibFunc::tanl: - case LibFunc::exp: - case LibFunc::expf: - case LibFunc::expl: - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: - case LibFunc::log: - case LibFunc::logf: - case LibFunc::logl: - case LibFunc::log10: - case LibFunc::log10f: - case LibFunc::log10l: - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: + FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(1)->isPointerTy() && + FTy.getParamType(2)->isIntegerTy()); + + case LibFunc_cxa_guard_abort: + case LibFunc_cxa_guard_acquire: + case LibFunc_cxa_guard_release: + case LibFunc_nvvm_reflect: + return (NumParams == 1 && FTy.getParamType(0)->isPointerTy()); + + case LibFunc_sincospi_stret: + case LibFunc_sincospif_stret: + return (NumParams == 1 && FTy.getParamType(0)->isFloatingPointTy()); + + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosh: + case LibFunc_acoshf: + case LibFunc_acoshl: + case LibFunc_acosl: + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinh: + case LibFunc_asinhf: + case LibFunc_asinhl: + case LibFunc_asinl: + case LibFunc_atan: + case LibFunc_atanf: + case LibFunc_atanh: + case LibFunc_atanhf: + case LibFunc_atanhl: + case LibFunc_atanl: + case LibFunc_cbrt: + case LibFunc_cbrtf: + case LibFunc_cbrtl: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + case LibFunc_cosl: + case LibFunc_exp10: + case LibFunc_exp10f: + case LibFunc_exp10l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + case LibFunc_expm1: + case LibFunc_expm1f: + case LibFunc_expm1l: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10l: + case LibFunc_log1p: + case LibFunc_log1pf: + case LibFunc_log1pl: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: + case LibFunc_log: + case LibFunc_logb: + case LibFunc_logbf: + case LibFunc_logbl: + case LibFunc_logf: + case LibFunc_logl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + case LibFunc_sinl: + case LibFunc_sqrt: + case LibFunc_sqrt_finite: + case LibFunc_sqrtf: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl: + case LibFunc_sqrtl_finite: + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + case LibFunc_tanl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: return (NumParams == 1 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0)); - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - case LibFunc::pow: - case LibFunc::powf: - case LibFunc::powl: + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: + case LibFunc_fmod: + case LibFunc_fmodf: + case LibFunc_fmodl: + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_powl: return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0) && FTy.getReturnType() == FTy.getParamType(1)); - case LibFunc::ffs: - case LibFunc::ffsl: - case LibFunc::ffsll: - case LibFunc::fls: - case LibFunc::flsl: - case LibFunc::flsll: + case LibFunc_ldexp: + case LibFunc_ldexpf: + case LibFunc_ldexpl: + return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && + FTy.getReturnType() == FTy.getParamType(0) && + FTy.getParamType(1)->isIntegerTy(32)); + + case LibFunc_ffs: + case LibFunc_ffsl: + case LibFunc_ffsll: + case LibFunc_fls: + case LibFunc_flsl: + case LibFunc_flsll: return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) && FTy.getParamType(0)->isIntegerTy()); - case LibFunc::isdigit: - case LibFunc::isascii: - case LibFunc::toascii: + case LibFunc_isdigit: + case LibFunc_isascii: + case LibFunc_toascii: + case LibFunc_putchar: return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) && FTy.getReturnType() == FTy.getParamType(0)); - case LibFunc::abs: - case LibFunc::labs: - case LibFunc::llabs: + case LibFunc_abs: + case LibFunc_labs: + case LibFunc_llabs: return (NumParams == 1 && FTy.getReturnType()->isIntegerTy() && FTy.getReturnType() == FTy.getParamType(0)); - case LibFunc::cxa_atexit: + case LibFunc_cxa_atexit: return (NumParams == 3 && FTy.getReturnType()->isIntegerTy() && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1)->isPointerTy() && FTy.getParamType(2)->isPointerTy()); - case LibFunc::sinpi: - case LibFunc::cospi: + case LibFunc_sinpi: + case LibFunc_cospi: return (NumParams == 1 && FTy.getReturnType()->isDoubleTy() && FTy.getReturnType() == FTy.getParamType(0)); - case LibFunc::sinpif: - case LibFunc::cospif: + case LibFunc_sinpif: + case LibFunc_cospif: return (NumParams == 1 && FTy.getReturnType()->isFloatTy() && FTy.getReturnType() == FTy.getParamType(0)); - default: - // Assume the other functions are correct. - // FIXME: It'd be really nice to cover them all. - return true; + case LibFunc_strnlen: + return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(1) && + FTy.getParamType(0) == PCharTy && + FTy.getParamType(1) == SizeTTy); + + case LibFunc_posix_memalign: + return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && + FTy.getParamType(0)->isPointerTy() && + FTy.getParamType(1) == SizeTTy && FTy.getParamType(2) == SizeTTy); + + case LibFunc::NumLibFuncs: + break; } + + llvm_unreachable("Invalid libfunc"); } bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl, - LibFunc::Func &F) const { + LibFunc &F) const { const DataLayout *DL = FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr; return getLibFunc(FDecl.getName(), F) && diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 5c0d1aac1b98..d73b1a128031 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -97,6 +97,10 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } +unsigned TargetTransformInfo::getFlatAddressSpace() const { + return TTIImpl->getFlatAddressSpace(); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } @@ -182,6 +186,21 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const return TTIImpl->shouldBuildLookupTablesForConstant(C); } +unsigned TargetTransformInfo:: +getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +} + +unsigned TargetTransformInfo:: +getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) const { + return TTIImpl->getOperandsScalarizationOverhead(Args, VF); +} + +bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { + return TTIImpl->supportsEfficientVectorElementLoadStore(); +} + bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } @@ -254,6 +273,12 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } +bool TargetTransformInfo::shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { + return TTIImpl->shouldConsiderAddressTypePromotion( + I, AllowPromotionWithoutCommonHeader); +} + unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } @@ -293,8 +318,10 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src); + Type *Src, const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -314,8 +341,10 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); + Type *CondTy, const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -329,8 +358,11 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const { - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + unsigned AddressSpace, + const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -363,17 +395,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost( } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, - FastMathFlags FMF) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF); + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, + ScalarizationCostPassed); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, - FastMathFlags FMF) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF); + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp index f56754167360..6871e4887c9e 100644 --- a/lib/Analysis/TypeMetadataUtils.cpp +++ b/lib/Analysis/TypeMetadataUtils.cpp @@ -39,7 +39,7 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls, // Search for virtual calls that load from VPtr and add them to DevirtCalls. static void -findLoadCallsAtConstantOffset(Module *M, +findLoadCallsAtConstantOffset(const Module *M, SmallVectorImpl<DevirtCallSite> &DevirtCalls, Value *VPtr, int64_t Offset) { for (const Use &U : VPtr->uses()) { @@ -62,10 +62,10 @@ findLoadCallsAtConstantOffset(Module *M, void llvm::findDevirtualizableCallsForTypeTest( SmallVectorImpl<DevirtCallSite> &DevirtCalls, - SmallVectorImpl<CallInst *> &Assumes, CallInst *CI) { + SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI) { assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test); - Module *M = CI->getParent()->getParent()->getParent(); + const Module *M = CI->getParent()->getParent()->getParent(); // Find llvm.assume intrinsics for this llvm.type.test call. for (const Use &CIU : CI->uses()) { @@ -86,7 +86,8 @@ void llvm::findDevirtualizableCallsForTypeTest( void llvm::findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl<DevirtCallSite> &DevirtCalls, SmallVectorImpl<Instruction *> &LoadedPtrs, - SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI) { + SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, + const CallInst *CI) { assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_checked_load); @@ -96,7 +97,7 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad( return; } - for (Use &U : CI->uses()) { + for (const Use &U : CI->uses()) { auto CIU = U.getUser(); if (auto EVI = dyn_cast<ExtractValueInst>(CIU)) { if (EVI->getNumIndices() == 1 && EVI->getIndices()[0] == 0) { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index b79370baad10..d871e83f222a 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" @@ -76,6 +77,9 @@ struct Query { AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; + // Unlike the other analyses, this may be a nullptr because not all clients + // provide it currently. + OptimizationRemarkEmitter *ORE; /// Set of assumptions that should be excluded from further queries. /// This is because of the potential for mutual recursion to cause @@ -90,11 +94,12 @@ struct Query { unsigned NumExcluded; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), NumExcluded(0) {} + const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr) + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {} Query(const Query &Q, const Value *NewExcl) - : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), NumExcluded(Q.NumExcluded) { + : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), + NumExcluded(Q.NumExcluded) { Excluded = Q.Excluded; Excluded[NumExcluded++] = NewExcl; assert(NumExcluded <= Excluded.size()); @@ -131,9 +136,10 @@ static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, + OptimizationRemarkEmitter *ORE) { ::computeKnownBits(V, KnownZero, KnownOne, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT)); + Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, @@ -249,30 +255,6 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, unsigned Depth, const Query &Q) { - if (!Add) { - if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (!CLHS->getValue().isNegative()) { - unsigned BitWidth = KnownZero.getBitWidth(); - unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); - - // If all of the MaskV bits are known to be zero, then we know the - // output top bits are zero, because we now know that the output is - // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); - // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); - } - } - } - } - unsigned BitWidth = KnownZero.getBitWidth(); // If an initial sequence of bits in the result is not needed, the @@ -282,11 +264,11 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. - APInt CarryIn(BitWidth, 0); + uint64_t CarryIn = 0; if (!Add) { // Sum = LHS + ~RHS + 1 std::swap(KnownZero2, KnownOne2); - CarryIn.setBit(0); + CarryIn = 1; } APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn; @@ -315,11 +297,11 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, // Adding two non-negative numbers, or subtracting a negative number from // a non-negative one, can't wrap into negative. if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); + KnownZero.setSignBit(); // Adding two negative numbers, or subtracting a non-negative number from // a negative one, can't wrap into non-negative. else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); + KnownOne.setSignBit(); } } } @@ -370,8 +352,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, TrailZ = std::min(TrailZ, BitWidth); LeadZ = std::min(LeadZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | - APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero.clearAllBits(); + KnownZero.setLowBits(TrailZ); + KnownZero.setHighBits(LeadZ); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -379,9 +362,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // though as the program is invoking undefined behaviour we can choose // whatever we like here. if (isKnownNonNegative && !KnownOne.isNegative()) - KnownZero.setBit(BitWidth - 1); + KnownZero.setSignBit(); else if (isKnownNegative && !KnownZero.isNegative()) - KnownOne.setBit(BitWidth - 1); + KnownOne.setSignBit(); } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, @@ -553,6 +536,13 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, KnownOne.setAllBits(); return; } + if (match(Arg, m_Not(m_Specific(V))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + assert(BitWidth == 1 && "assume operand is not i1?"); + KnownZero.setAllBits(); + KnownOne.clearAllBits(); + return; + } // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth == MaxDepth) @@ -719,7 +709,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (RHSKnownZero.isNegative()) { // We know that the sign bit is zero. - KnownZero |= APInt::getSignBit(BitWidth); + KnownZero.setSignBit(); } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -730,7 +720,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { // We know that the sign bit is zero. - KnownZero |= APInt::getSignBit(BitWidth); + KnownZero.setSignBit(); } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -741,7 +731,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (RHSKnownOne.isNegative()) { // We know that the sign bit is one. - KnownOne |= APInt::getSignBit(BitWidth); + KnownOne.setSignBit(); } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -752,7 +742,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { // We know that the sign bit is one. - KnownOne |= APInt::getSignBit(BitWidth); + KnownOne.setSignBit(); } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && @@ -762,8 +752,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. - KnownZero |= - APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); + KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && @@ -774,11 +763,27 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) - KnownZero |= - APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); + KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()+1); else - KnownZero |= - APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); + KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); + } + } + + // If assumptions conflict with each other or previous known bits, then we + // have a logical fallacy. It's possible that the assumption is not reachable, + // so this isn't a real bug. On the other hand, the program may have undefined + // behavior, or we might have a bug in the compiler. We can't assert/crash, so + // clear out the known bits, try to warn the user, and hope for the best. + if ((KnownZero & KnownOne) != 0) { + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + + if (Q.ORE) { + auto *CxtI = const_cast<Instruction *>(Q.CxtI); + OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI); + Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may " + "have undefined behavior, or compiler may have " + "internal error."); } } } @@ -817,6 +822,14 @@ static void computeKnownBitsFromShiftOperator( computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + // If the shift amount could be greater than or equal to the bit-width of the LHS, the + // value could be undef, so we don't know anything about it. + if ((~KnownZero).uge(BitWidth)) { + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + return; + } + // Note: We cannot use KnownZero.getLimitedValue() here, because if // BitWidth > 64 and any upper bits are known, we'll end up returning the // limit value (which implies all bits are known). @@ -905,14 +918,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. Value *Y = nullptr; - if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), - m_Value(Y))) || - match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), - m_Value(Y)))) { - APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); - computeKnownBits(Y, KnownZero3, KnownOne3, Depth + 1, Q); - if (KnownOne3.countTrailingOnes() > 0) - KnownZero |= APInt::getLowBitsSet(BitWidth, 1); + if (!KnownZero[0] && !KnownOne[0] && + (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), + m_Value(Y))) || + match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), + m_Value(Y))))) { + KnownZero2.clearAllBits(); KnownOne2.clearAllBits(); + computeKnownBits(Y, KnownZero2, KnownOne2, Depth + 1, Q); + if (KnownOne2.countTrailingOnes() > 0) + KnownZero.setBit(0); } break; } @@ -934,7 +948,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; + KnownZero = std::move(KnownZeroOut); break; } case Instruction::Mul: { @@ -958,15 +972,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero.setHighBits(LeadZ); break; } case Instruction::Select: { - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - - const Value *LHS; - const Value *RHS; + const Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q); @@ -980,23 +990,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. - if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1]) + if (KnownOne.isNegative() && KnownOne2.isNegative()) // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); // If either side is non-negative, the result is non-negative. - else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1]) + else if (KnownZero.isNegative() || KnownZero2.isNegative()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. - if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1]) + if (KnownZero.isNegative() && KnownZero2.isNegative()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); // If either side is negative, the result is negative. - else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1]) + else if (KnownOne.isNegative() || KnownOne2.isNegative()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the @@ -1014,9 +1024,9 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, KnownOne &= KnownOne2; KnownZero &= KnownZero2; if (MaxHighOnes > 0) - KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes); + KnownOne.setHighBits(MaxHighOnes); if (MaxHighZeros > 0) - KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros); + KnownZero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: @@ -1047,7 +1057,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + KnownZero.setBitsFrom(SrcBitWidth); break; } case Instruction::BitCast: { @@ -1068,35 +1078,29 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set - KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + KnownZero = KnownZero.sext(BitWidth); + KnownOne = KnownOne.sext(BitWidth); break; } case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); - auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) { - APInt KZResult = - (KnownZero << ShiftAmt) | - APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) { + APInt KZResult = KnownZero << ShiftAmt; + KZResult.setLowBits(ShiftAmt); // Low bits known 0. // If this shift has "nsw" keyword, then the result is either a poison // value or has the same sign bit as the first operand. if (NSW && KnownZero.isNegative()) - KZResult.setBit(BitWidth - 1); + KZResult.setSignBit(); return KZResult; }; - auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) { + auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) { APInt KOResult = KnownOne << ShiftAmt; if (NSW && KnownOne.isNegative()) - KOResult.setBit(BitWidth - 1); + KOResult.setSignBit(); return KOResult; }; @@ -1108,13 +1112,13 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { - return APIntOps::lshr(KnownZero, ShiftAmt) | + return KnownZero.lshr(ShiftAmt) | // High bits known zero. APInt::getHighBitsSet(BitWidth, ShiftAmt); }; - auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { - return APIntOps::lshr(KnownOne, ShiftAmt); + auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { + return KnownOne.lshr(ShiftAmt); }; computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, @@ -1124,12 +1128,12 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } case Instruction::AShr: { // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { - return APIntOps::ashr(KnownZero, ShiftAmt); + auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { + return KnownZero.ashr(ShiftAmt); }; - auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { - return APIntOps::ashr(KnownOne, ShiftAmt); + auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { + return KnownOne.ashr(ShiftAmt); }; computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, @@ -1165,12 +1169,12 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + if (KnownZero2.isNegative() || ((KnownZero2 & LowBits) == LowBits)) KnownZero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + if (KnownOne2.isNegative() && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); @@ -1185,7 +1189,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, Q); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) - KnownZero.setBit(BitWidth - 1); + KnownZero.setSignBit(); } break; @@ -1209,7 +1213,8 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); + KnownZero.clearAllBits(); + KnownZero.setHighBits(Leaders); break; } @@ -1220,7 +1225,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + KnownZero.setLowBits(countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { @@ -1267,7 +1272,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } } - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); + KnownZero.setLowBits(TrailZ); break; } case Instruction::PHI: { @@ -1308,9 +1313,8 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, APInt KnownZero3(KnownZero), KnownOne3(KnownOne); computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); - KnownZero = APInt::getLowBitsSet( - BitWidth, std::min(KnownZero2.countTrailingOnes(), - KnownZero3.countTrailingOnes())); + KnownZero.setLowBits(std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); if (DontImproveNonNegativePhiBits) break; @@ -1328,24 +1332,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // (add negative, negative) --> negative if (Opcode == Instruction::Add) { if (KnownZero2.isNegative() && KnownZero3.isNegative()) - KnownZero.setBit(BitWidth - 1); + KnownZero.setSignBit(); else if (KnownOne2.isNegative() && KnownOne3.isNegative()) - KnownOne.setBit(BitWidth - 1); + KnownOne.setSignBit(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { if (KnownZero2.isNegative() && KnownOne3.isNegative()) - KnownZero.setBit(BitWidth - 1); + KnownZero.setSignBit(); else if (KnownOne2.isNegative() && KnownZero3.isNegative()) - KnownOne.setBit(BitWidth - 1); + KnownOne.setSignBit(); } // (mul nsw non-negative, non-negative) --> non-negative else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && KnownZero3.isNegative()) - KnownZero.setBit(BitWidth - 1); + KnownZero.setSignBit(); } break; @@ -1364,8 +1368,8 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, if (dyn_cast_or_null<UndefValue>(P->hasConstantValue())) break; - KnownZero = APInt::getAllOnesValue(BitWidth); - KnownOne = APInt::getAllOnesValue(BitWidth); + KnownZero.setAllBits(); + KnownOne.setAllBits(); for (Value *IncValue : P->incoming_values()) { // Skip direct self references. if (IncValue == P) continue; @@ -1400,6 +1404,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::bitreverse: + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + KnownZero |= KnownZero2.reverseBits(); + KnownOne |= KnownOne2.reverseBits(); + break; case Intrinsic::bswap: computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); KnownZero |= KnownZero2.byteSwap(); @@ -1411,7 +1420,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero.setBitsFrom(LowBits); break; } case Intrinsic::ctpop: { @@ -1419,17 +1428,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); - unsigned LeadingZeros = - APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); - assert(LeadingZeros <= BitWidth); - KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); - KnownOne &= ~KnownZero; + unsigned LowBits = Log2_32(BitsPossiblySet)+1; + KnownZero.setBitsFrom(LowBits); // TODO: we could bound KnownOne using the lower bound on the number // of bits which might be set provided by popcnt KnownOne2. break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero |= APInt::getHighBitsSet(64, 32); + KnownZero.setBitsFrom(32); break; } } @@ -1502,6 +1508,7 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && "V, KnownOne and KnownZero should have same BitWidth"); + (void)BitWidth; const APInt *C; if (match(V, m_APInt(C))) { @@ -1513,7 +1520,7 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, // Null and aggregate-zero are all-zeros. if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { KnownOne.clearAllBits(); - KnownZero = APInt::getAllOnesValue(BitWidth); + KnownZero.setAllBits(); return; } // Handle a constant vector by taking the intersection of the known bits of @@ -1582,7 +1589,7 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (V->getType()->isPointerTy()) { unsigned Align = V->getPointerAlignment(Q.DL); if (Align) - KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + KnownZero.setLowBits(countTrailingZeros(Align)); } // computeKnownBitsFromAssume strictly refines KnownZero and @@ -1607,8 +1614,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); computeKnownBits(V, ZeroBits, OneBits, Depth, Q); - KnownOne = OneBits[BitWidth - 1]; - KnownZero = ZeroBits[BitWidth - 1]; + KnownOne = OneBits.isNegative(); + KnownZero = ZeroBits.isNegative(); } /// Return true if the given value is known to have exactly one @@ -1788,10 +1795,12 @@ static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) return true; } -/// Return true if the given value is known to be non-zero when defined. -/// For vectors return true if every element is known to be non-zero when -/// defined. Supports values with integer or pointer type and vectors of -/// integers. +/// Return true if the given value is known to be non-zero when defined. For +/// vectors, return true if every element is known to be non-zero when +/// defined. For pointers, if the context instruction and dominator tree are +/// specified, perform context-sensitive analysis and return true if the +/// pointer couldn't possibly be null at the specified instruction. +/// Supports values with integer or pointer type and vectors of integers. bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (auto *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) @@ -1834,7 +1843,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // Check for pointer simplifications. if (V->getType()->isPointerTy()) { - if (isKnownNonNull(V)) + if (isKnownNonNullAt(V, Q.CxtI, Q.DT)) return true; if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, Depth, Q)) @@ -2075,13 +2084,29 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V, return MinSignBits; } +static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, + const Query &Q); + +static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, + const Query &Q) { + unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q); + assert(Result > 0 && "At least one sign bit needs to be present!"); + return Result; +} + /// Return the number of times the sign bit of the register is replicated into /// the other bits. We know that at least 1 bit is always equal to the sign bit /// (itself), but other cases can give us information. For example, immediately /// after an "ashr X, 2", we know that the top 3 bits are all equal to each /// other, so we return 3. For vectors, return the number of sign bits for the /// vector element with the mininum number of known sign bits. -unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) { +static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, + const Query &Q) { + + // We return the minimum number of sign bits that are guaranteed to be present + // in V, so for undef we have to conservatively return 1. We don't have the + // same behavior for poison though -- that's a FIXME today. + unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType()); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2157,7 +2182,10 @@ unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) { // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { - Tmp += ShAmt->getZExtValue(); + unsigned ShAmtLimited = ShAmt->getZExtValue(); + if (ShAmtLimited >= TyBits) + break; // Bad shift. + Tmp += ShAmtLimited; if (Tmp > TyBits) Tmp = TyBits; } return Tmp; @@ -2436,7 +2464,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, if (!TLI) return Intrinsic::not_intrinsic; - LibFunc::Func Func; + LibFunc Func; // We're going to make assumptions on the semantics of the functions, check // that the target knows that it's available in this environment and it does // not have local linkage. @@ -2451,81 +2479,81 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, switch (Func) { default: break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: return Intrinsic::sin; - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: return Intrinsic::cos; - case LibFunc::exp: - case LibFunc::expf: - case LibFunc::expl: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: return Intrinsic::exp; - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: return Intrinsic::exp2; - case LibFunc::log: - case LibFunc::logf: - case LibFunc::logl: + case LibFunc_log: + case LibFunc_logf: + case LibFunc_logl: return Intrinsic::log; - case LibFunc::log10: - case LibFunc::log10f: - case LibFunc::log10l: + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10l: return Intrinsic::log10; - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: return Intrinsic::log2; - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: return Intrinsic::fabs; - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: return Intrinsic::minnum; - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: return Intrinsic::maxnum; - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: return Intrinsic::copysign; - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: return Intrinsic::floor; - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: return Intrinsic::ceil; - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: return Intrinsic::trunc; - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: return Intrinsic::rint; - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: return Intrinsic::nearbyint; - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: return Intrinsic::round; - case LibFunc::pow: - case LibFunc::powf: - case LibFunc::powl: + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_powl: return Intrinsic::pow; - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: if (ICS->hasNoNaNs()) return Intrinsic::sqrt; return Intrinsic::not_intrinsic; @@ -2590,6 +2618,11 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, const TargetLibraryInfo *TLI, bool SignBitOnly, unsigned Depth) { + // TODO: This function does not do the right thing when SignBitOnly is true + // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform + // which flips the sign bits of NaNs. See + // https://llvm.org/bugs/show_bug.cgi?id=31702. + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { return !CFP->getValueAPF().isNegative() || (!SignBitOnly && CFP->getValueAPF().isZero()); @@ -2633,7 +2666,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1); case Instruction::Call: - Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI); + const auto *CI = cast<CallInst>(I); + Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI); switch (IID) { default: break; @@ -2650,16 +2684,37 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::fabs: - case Intrinsic::sqrt: return true; + + case Intrinsic::sqrt: + // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. + if (!SignBitOnly) + return true; + return CI->hasNoNaNs() && (CI->hasNoSignedZeros() || + CannotBeNegativeZero(CI->getOperand(0), TLI)); + case Intrinsic::powi: - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) { // powi(x,n) is non-negative if n is even. - if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0) + if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) return true; } + // TODO: This is not correct. Given that exp is an integer, here are the + // ways that pow can return a negative value: + // + // pow(x, exp) --> negative if exp is odd and x is negative. + // pow(-0, exp) --> -inf if exp is negative odd. + // pow(-0, exp) --> -0 if exp is positive odd. + // pow(-inf, exp) --> -0 if exp is negative odd. + // pow(-inf, exp) --> -inf if exp is positive odd. + // + // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, + // but we must return false if x == -0. Unfortunately we do not currently + // have a way of expressing this constraint. See details in + // https://llvm.org/bugs/show_bug.cgi?id=31702. return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1); + case Intrinsic::fma: case Intrinsic::fmuladd: // x*x+y is non-negative if y is non-negative. @@ -3150,6 +3205,9 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, if (GA->isInterposable()) return V; V = GA->getAliasee(); + } else if (isa<AllocaInst>(V)) { + // An alloca can't be further simplified. + return V; } else { if (auto CS = CallSite(V)) if (Value *RV = CS.getReturnedArgOperand()) { @@ -3327,6 +3385,10 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Intrinsic::rint: case Intrinsic::round: return true; + // These intrinsics do not correspond to any libm function, and + // do not set errno. + case Intrinsic::powi: + return true; // TODO: are convert_{from,to}_fp16 safe? // TODO: can we list target-specific intrinsics here? default: break; @@ -3406,6 +3468,16 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, if (NumUsesExplored >= DomConditionsMaxUses) break; NumUsesExplored++; + + // If the value is used as an argument to a call or invoke, then argument + // attributes may provide an answer about null-ness. + if (auto CS = ImmutableCallSite(U)) + if (auto *CalledFunc = CS.getCalledFunction()) + for (const Argument &Arg : CalledFunc->args()) + if (CS.getArgOperand(Arg.getArgNo()) == V && + Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI)) + return true; + // Consider only compare instructions uniquely controlling a branch CmpInst::Predicate Pred; if (!match(const_cast<User *>(U), @@ -3683,6 +3755,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { return false; if (isa<ReturnInst>(I)) return false; + if (isa<UnreachableInst>(I)) + return false; // Calls can throw, or contain an infinite loop, or kill the process. if (auto CS = ImmutableCallSite(I)) { @@ -3731,79 +3805,33 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, bool llvm::propagatesFullPoison(const Instruction *I) { switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: - // These operations all propagate poison unconditionally. Note that poison - // is not any particular value, so xor or subtraction of poison with - // itself still yields poison, not zero. - return true; - - case Instruction::AShr: - case Instruction::SExt: - // For these operations, one bit of the input is replicated across - // multiple output bits. A replicated poison bit is still poison. - return true; - - case Instruction::Shl: { - // Left shift *by* a poison value is poison. The number of - // positions to shift is unsigned, so no negative values are - // possible there. Left shift by zero places preserves poison. So - // it only remains to consider left shift of poison by a positive - // number of places. - // - // A left shift by a positive number of places leaves the lowest order bit - // non-poisoned. However, if such a shift has a no-wrap flag, then we can - // make the poison operand violate that flag, yielding a fresh full-poison - // value. - auto *OBO = cast<OverflowingBinaryOperator>(I); - return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); - } - - case Instruction::Mul: { - // A multiplication by zero yields a non-poison zero result, so we need to - // rule out zero as an operand. Conservatively, multiplication by a - // non-zero constant is not multiplication by zero. - // - // Multiplication by a non-zero constant can leave some bits - // non-poisoned. For example, a multiplication by 2 leaves the lowest - // order bit unpoisoned. So we need to consider that. - // - // Multiplication by 1 preserves poison. If the multiplication has a - // no-wrap flag, then we can make the poison operand violate that flag - // when multiplied by any integer other than 0 and 1. - auto *OBO = cast<OverflowingBinaryOperator>(I); - if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { - for (Value *V : OBO->operands()) { - if (auto *CI = dyn_cast<ConstantInt>(V)) { - // A ConstantInt cannot yield poison, so we can assume that it is - // the other operand that is poison. - return !CI->isZero(); - } - } - } - return false; - } + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::GetElementPtr: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; - case Instruction::ICmp: - // Comparing poison with any value yields poison. This is why, for - // instance, x s< (x +nsw 1) can be folded to true. - return true; + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; - case Instruction::GetElementPtr: - // A GEP implicitly represents a sequence of additions, subtractions, - // truncations, sign extensions and multiplications. The multiplications - // are by the non-zero sizes of some set of types, so we do not have to be - // concerned with multiplication by zero. If the GEP is in-bounds, then - // these operations are implicitly no-signed-wrap so poison is propagated - // by the arguments above for Add, Sub, Trunc, SExt and Mul. - return cast<GEPOperator>(I)->isInBounds(); + case Instruction::ICmp: + // Comparing poison with any value yields poison. This is why, for + // instance, x s< (x +nsw 1) can be folded to true. + return true; - default: - return false; + default: + return false; } } @@ -3906,6 +3934,37 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS) { + // Assume success. If there's no match, callers should not use these anyway. + LHS = TrueVal; + RHS = FalseVal; + + // Recognize variations of: + // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) + const APInt *C1; + if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { + const APInt *C2; + + // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) + if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && + C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) + return {SPF_SMAX, SPNB_NA, false}; + + // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) + if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && + C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) + return {SPF_SMIN, SPNB_NA, false}; + + // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1) + if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && + C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) + return {SPF_UMAX, SPNB_NA, false}; + + // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) + if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && + C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) + return {SPF_UMIN, SPNB_NA, false}; + } + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -3913,23 +3972,16 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0) // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0) if (match(TrueVal, m_Zero()) && - match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) { - LHS = TrueVal; - RHS = FalseVal; + match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; - } // Z = X -nsw Y // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0) if (match(FalseVal, m_Zero()) && - match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) { - LHS = TrueVal; - RHS = FalseVal; + match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; - } - const APInt *C1; if (!match(CmpRHS, m_APInt(C1))) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -3940,41 +3992,29 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, // Is the sign bit set? // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN - if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) { - LHS = TrueVal; - RHS = FalseVal; + if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; - } // Is the sign bit clear? // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() && - C2->isMinSignedValue()) { - LHS = TrueVal; - RHS = FalseVal; + C2->isMinSignedValue()) return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; - } } // Look through 'not' ops to find disguised signed min/max. // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C) // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C) if (match(TrueVal, m_Not(m_Specific(CmpLHS))) && - match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) { - LHS = TrueVal; - RHS = FalseVal; + match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false}; - } // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X) // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X) if (match(FalseVal, m_Not(m_Specific(CmpLHS))) && - match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) { - LHS = TrueVal; - RHS = FalseVal; + match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false}; - } return {SPF_UNKNOWN, SPNB_NA, false}; } @@ -4101,58 +4141,64 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, Instruction::CastOps *CastOp) { - CastInst *CI = dyn_cast<CastInst>(V1); - Constant *C = dyn_cast<Constant>(V2); - if (!CI) + auto *Cast1 = dyn_cast<CastInst>(V1); + if (!Cast1) return nullptr; - *CastOp = CI->getOpcode(); - - if (auto *CI2 = dyn_cast<CastInst>(V2)) { - // If V1 and V2 are both the same cast from the same type, we can look - // through V1. - if (CI2->getOpcode() == CI->getOpcode() && - CI2->getSrcTy() == CI->getSrcTy()) - return CI2->getOperand(0); - return nullptr; - } else if (!C) { + + *CastOp = Cast1->getOpcode(); + Type *SrcTy = Cast1->getSrcTy(); + if (auto *Cast2 = dyn_cast<CastInst>(V2)) { + // If V1 and V2 are both the same cast from the same type, look through V1. + if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) + return Cast2->getOperand(0); return nullptr; } - Constant *CastedTo = nullptr; - - if (isa<ZExtInst>(CI) && CmpI->isUnsigned()) - CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy()); - - if (isa<SExtInst>(CI) && CmpI->isSigned()) - CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy(), true); - - if (isa<TruncInst>(CI)) - CastedTo = ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); - - if (isa<FPTruncInst>(CI)) - CastedTo = ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); - - if (isa<FPExtInst>(CI)) - CastedTo = ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); - - if (isa<FPToUIInst>(CI)) - CastedTo = ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); - - if (isa<FPToSIInst>(CI)) - CastedTo = ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); - - if (isa<UIToFPInst>(CI)) - CastedTo = ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + auto *C = dyn_cast<Constant>(V2); + if (!C) + return nullptr; - if (isa<SIToFPInst>(CI)) - CastedTo = ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + Constant *CastedTo = nullptr; + switch (*CastOp) { + case Instruction::ZExt: + if (CmpI->isUnsigned()) + CastedTo = ConstantExpr::getTrunc(C, SrcTy); + break; + case Instruction::SExt: + if (CmpI->isSigned()) + CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); + break; + case Instruction::Trunc: + CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); + break; + case Instruction::FPTrunc: + CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true); + break; + case Instruction::FPExt: + CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true); + break; + case Instruction::FPToUI: + CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true); + break; + case Instruction::FPToSI: + CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true); + break; + case Instruction::UIToFP: + CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true); + break; + case Instruction::SIToFP: + CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true); + break; + default: + break; + } if (!CastedTo) return nullptr; - Constant *CastedBack = - ConstantExpr::getCast(CI->getOpcode(), CastedTo, C->getType(), true); // Make sure the cast doesn't lose any information. + Constant *CastedBack = + ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true); if (CastedBack != C) return nullptr; diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 7e598f435ff5..722f17a8067e 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -488,3 +488,88 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) { return Inst; } + +Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < VF; i++) + for (unsigned j = 0; j < NumVecs; j++) + Mask.push_back(Builder.getInt32(j * VF + i)); + + return ConstantVector::get(Mask); +} + +Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < VF; i++) + Mask.push_back(Builder.getInt32(Start + i * Stride)); + + return ConstantVector::get(Mask); +} + +Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < NumInts; i++) + Mask.push_back(Builder.getInt32(Start + i)); + + Constant *Undef = UndefValue::get(Builder.getInt32Ty()); + for (unsigned i = 0; i < NumUndefs; i++) + Mask.push_back(Undef); + + return ConstantVector::get(Mask); +} + +/// A helper function for concatenating vectors. This function concatenates two +/// vectors having the same element type. If the second vector has fewer +/// elements than the first, it is padded with undefs. +static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, + Value *V2) { + VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType()); + VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType()); + assert(VecTy1 && VecTy2 && + VecTy1->getScalarType() == VecTy2->getScalarType() && + "Expect two vectors with the same element type"); + + unsigned NumElts1 = VecTy1->getNumElements(); + unsigned NumElts2 = VecTy2->getNumElements(); + assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); + + if (NumElts1 > NumElts2) { + // Extend with UNDEFs. + Constant *ExtMask = + createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2); + V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); + } + + Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0); + return Builder.CreateShuffleVector(V1, V2, Mask); +} + +Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) { + unsigned NumVecs = Vecs.size(); + assert(NumVecs > 1 && "Should be at least two vectors"); + + SmallVector<Value *, 8> ResList; + ResList.append(Vecs.begin(), Vecs.end()); + do { + SmallVector<Value *, 8> TmpList; + for (unsigned i = 0; i < NumVecs - 1; i += 2) { + Value *V0 = ResList[i], *V1 = ResList[i + 1]; + assert((V0->getType() == V1->getType() || i == NumVecs - 2) && + "Only the last vector may have a different type"); + + TmpList.push_back(concatenateTwoVectors(Builder, V0, V1)); + } + + // Push the last vector if the total number of vectors is odd. + if (NumVecs % 2 != 0) + TmpList.push_back(ResList[NumVecs - 1]); + + ResList = TmpList; + NumVecs = ResList.size(); + } while (NumVecs > 1); + + return ResList[0]; +} |