diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/AliasSetTracker.cpp | 16 | ||||
-rw-r--r-- | lib/Analysis/BasicAliasAnalysis.cpp | 92 | ||||
-rw-r--r-- | lib/Analysis/CFGPrinter.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/CallGraph.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/CallGraphSCCPass.cpp | 98 | ||||
-rw-r--r-- | lib/Analysis/DemandedBits.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/GlobalsModRef.cpp | 12 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 325 | ||||
-rw-r--r-- | lib/Analysis/LazyValueInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 52 | ||||
-rw-r--r-- | lib/Analysis/MemDepPrinter.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 17 | ||||
-rw-r--r-- | lib/Analysis/MustExecute.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolution.cpp | 10 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 18 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 12 |
16 files changed, 390 insertions, 280 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 8aee81b1f1d8..8f903fa4f1e8 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -142,7 +142,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, Alias = SetMayAlias; AST.TotalMayAliasSetSize += size(); } else { - // First entry of must alias must have maximum size! + // First entry of must alias must have maximum size! P->updateSizeAndAAInfo(Size, AAInfo); } assert(Result != NoAlias && "Cannot be part of must set!"); @@ -251,9 +251,9 @@ void AliasSetTracker::clear() { for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); I != E; ++I) I->second->eraseFromList(); - + PointerMap.clear(); - + // The alias sets should all be clear now. AliasSets.clear(); } @@ -269,7 +269,7 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr, for (iterator I = begin(), E = end(); I != E;) { iterator Cur = I++; if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue; - + if (!FoundSet) { // If this is the first alias set ptr can go into. FoundSet = &*Cur; // Remember it. } else { // Otherwise, we must merge the sets. @@ -336,13 +336,13 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, // Return the set! return *Entry.getAliasSet(*this)->getForwardedTarget(*this); } - + if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) { // Add it to the alias set it aliases. AS->addPointer(*this, Entry, Size, AAInfo); return *AS; } - + // Otherwise create a new alias set to hold the loaded pointer. AliasSets.push_back(new AliasSet()); AliasSets.back().addPointer(*this, Entry, Size, AAInfo); @@ -526,10 +526,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { AS->SetSize--; TotalMayAliasSetSize--; } - + // Stop using the alias set. AS->dropRef(*this); - + PointerMap.erase(I); } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 96326347b712..1a24ae3dba15 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/PhiValues.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" @@ -93,7 +94,8 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, // depend on them. if (Inv.invalidate<AssumptionAnalysis>(Fn, PA) || (DT && Inv.invalidate<DominatorTreeAnalysis>(Fn, PA)) || - (LI && Inv.invalidate<LoopAnalysis>(Fn, PA))) + (LI && Inv.invalidate<LoopAnalysis>(Fn, PA)) || + (PV && Inv.invalidate<PhiValuesAnalysis>(Fn, PA))) return true; // Otherwise this analysis result remains valid. @@ -1527,34 +1529,70 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, return Alias; } - SmallPtrSet<Value *, 4> UniqueSrc; SmallVector<Value *, 4> V1Srcs; bool isRecursive = false; - for (Value *PV1 : PN->incoming_values()) { - if (isa<PHINode>(PV1)) - // If any of the source itself is a PHI, return MayAlias conservatively - // to avoid compile time explosion. The worst possible case is if both - // sides are PHI nodes. In which case, this is O(m x n) time where 'm' - // and 'n' are the number of PHI sources. + if (PV) { + // If we have PhiValues then use it to get the underlying phi values. + const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN); + // If we have more phi values than the search depth then return MayAlias + // conservatively to avoid compile time explosion. The worst possible case + // is if both sides are PHI nodes. In which case, this is O(m x n) time + // where 'm' and 'n' are the number of PHI sources. + if (PhiValueSet.size() > MaxLookupSearchDepth) return MayAlias; - - if (EnableRecPhiAnalysis) - if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { - // Check whether the incoming value is a GEP that advances the pointer - // result of this PHI node (e.g. in a loop). If this is the case, we - // would recurse and always get a MayAlias. Handle this case specially - // below. - if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && - isa<ConstantInt>(PV1GEP->idx_begin())) { - isRecursive = true; - continue; + // Add the values to V1Srcs + for (Value *PV1 : PhiValueSet) { + if (EnableRecPhiAnalysis) { + if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. + if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && + isa<ConstantInt>(PV1GEP->idx_begin())) { + isRecursive = true; + continue; + } } } - - if (UniqueSrc.insert(PV1).second) V1Srcs.push_back(PV1); + } + } else { + // If we don't have PhiInfo then just look at the operands of the phi itself + // FIXME: Remove this once we can guarantee that we have PhiInfo always + SmallPtrSet<Value *, 4> UniqueSrc; + for (Value *PV1 : PN->incoming_values()) { + if (isa<PHINode>(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + + if (EnableRecPhiAnalysis) + if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. + if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && + isa<ConstantInt>(PV1GEP->idx_begin())) { + isRecursive = true; + continue; + } + } + + if (UniqueSrc.insert(PV1).second) + V1Srcs.push_back(PV1); + } } + // If V1Srcs is empty then that means that the phi has no underlying non-phi + // value. This should only be possible in blocks unreachable from the entry + // block, but return MayAlias just in case. + if (V1Srcs.empty()) + return MayAlias; + // If this PHI node is recursive, set the size of the accessed memory to // unknown to represent all the possible values the GEP could advance the // pointer to. @@ -1879,7 +1917,8 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult<TargetLibraryAnalysis>(F), AM.getResult<AssumptionAnalysis>(F), &AM.getResult<DominatorTreeAnalysis>(F), - AM.getCachedResult<LoopAnalysis>(F)); + AM.getCachedResult<LoopAnalysis>(F), + AM.getCachedResult<PhiValuesAnalysis>(F)); } BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { @@ -1891,12 +1930,12 @@ char BasicAAWrapperPass::ID = 0; void BasicAAWrapperPass::anchor() {} INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", true, true) + "Basic Alias Analysis (stateless AA impl)", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", true, true) + "Basic Alias Analysis (stateless AA impl)", false, true) FunctionPass *llvm::createBasicAAWrapperPass() { return new BasicAAWrapperPass(); @@ -1907,10 +1946,12 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); auto &DTWP = getAnalysis<DominatorTreeWrapperPass>(); auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>(); Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(), ACT.getAssumptionCache(F), &DTWP.getDomTree(), - LIWP ? &LIWP->getLoopInfo() : nullptr)); + LIWP ? &LIWP->getLoopInfo() : nullptr, + PVWP ? &PVWP->getResult() : nullptr)); return false; } @@ -1920,6 +1961,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addUsedIfAvailable<PhiValuesWrapperPass>(); } BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index fc25cef8ddca..5b170dfa7903 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -124,7 +124,7 @@ namespace { } char CFGPrinterLegacyPass::ID = 0; -INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", +INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", false, true) PreservedAnalyses CFGPrinterPass::run(Function &F, diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 7d5d2d2e4496..cbdf5f63c557 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -166,7 +166,7 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << "Call graph node for function: '" << F->getName() << "'"; else OS << "Call graph node <<null function>>"; - + OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n'; for (const auto &I : *this) { diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp index f2211edba216..4c33c420b65d 100644 --- a/lib/Analysis/CallGraphSCCPass.cpp +++ b/lib/Analysis/CallGraphSCCPass.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "cgscc-passmgr" -static cl::opt<unsigned> +static cl::opt<unsigned> MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); @@ -97,13 +97,13 @@ public: } PassManagerType getPassManagerType() const override { - return PMT_CallGraphPassManager; + return PMT_CallGraphPassManager; } - + private: bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool &DevirtualizedCall); - + bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, CallGraph &CG, bool &CallGraphUpToDate, bool &DevirtualizedCall); @@ -142,21 +142,21 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, if (EmitICRemark) emitInstrCountChangedRemark(P, M, InstrCount); } - + // After the CGSCCPass is done, when assertions are enabled, use // RefreshCallGraph to verify that the callgraph was correctly updated. #ifndef NDEBUG if (Changed) RefreshCallGraph(CurSCC, CG, true); #endif - + return Changed; } - + assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); FPPassManager *FPP = (FPPassManager*)P; - + // Run pass P on all functions in the current SCC. for (CallGraphNode *CGN : CurSCC) { if (Function *F = CGN->getFunction()) { @@ -168,7 +168,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, F->getContext().yield(); } } - + // The function pass(es) modified the IR, they may have clobbered the // callgraph. if (Changed && CallGraphUpToDate) { @@ -199,7 +199,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool MadeChange = false; bool DevirtualizedCall = false; - + // Scan all functions in the SCC. unsigned FunctionNo = 0; for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end(); @@ -207,14 +207,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallGraphNode *CGN = *SCCIdx; Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; - + // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. // Keep track of the number of direct and indirect calls that were // invalidated and removed. unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; - + // Get the set of call sites currently in the function. for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call @@ -226,7 +226,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallSites.count(I->first) || // If the call edge is not from a call or invoke, or it is a - // instrinsic call, then the function pass RAUW'd a call with + // instrinsic call, then the function pass RAUW'd a call with // another value. This can happen when constant folding happens // of well known functions etc. !CallSite(I->first) || @@ -236,18 +236,18 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CallSite(I->first).getCalledFunction()->getIntrinsicID()))) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); - + // If this was an indirect call site, count it. if (!I->second->getFunction()) ++NumIndirectRemoved; - else + else ++NumDirectRemoved; - + // Just remove the edge from the set of callees, keep track of whether // I points to the last element of the vector. bool WasLast = I + 1 == E; CGN->removeCallEdge(I); - + // If I pointed to the last element of the vector, we have to bail out: // iterator checking rejects comparisons of the resultant pointer with // end. @@ -256,10 +256,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, E = CGN->end(); continue; } - + assert(!CallSites.count(I->first) && "Call site occurs in node multiple times"); - + CallSite CS(I->first); if (CS) { Function *Callee = CS.getCalledFunction(); @@ -269,7 +269,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, } ++I; } - + // Loop over all of the instructions in the function, getting the callsites. // Keep track of the number of direct/indirect calls added. unsigned NumDirectAdded = 0, NumIndirectAdded = 0; @@ -280,7 +280,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (!CS) continue; Function *Callee = CS.getCalledFunction(); if (Callee && Callee->isIntrinsic()) continue; - + // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = @@ -290,11 +290,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // Remove from CallSites since we have now seen it. CallSites.erase(ExistingIt); - + // Verify that the callee is right. if (ExistingNode->getFunction() == CS.getCalledFunction()) continue; - + // If we are in checking mode, we are not allowed to actually mutate // the callgraph. If this is a case where we can infer that the // callgraph is less precise than it could be (e.g. an indirect call @@ -303,10 +303,10 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (CheckingMode && CS.getCalledFunction() && ExistingNode->getFunction() == nullptr) continue; - + assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); - + // If not, we either went from a direct call to indirect, indirect to // direct, or direct to different direct. CallGraphNode *CalleeNode; @@ -328,7 +328,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, MadeChange = true; continue; } - + assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); @@ -341,11 +341,11 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, CalleeNode = CG.getCallsExternalNode(); ++NumIndirectAdded; } - + CGN->addCalledFunction(CS, CalleeNode); MadeChange = true; } - + // We scanned the old callgraph node, removing invalidated call sites and // then added back newly found call sites. One thing that can happen is // that an old indirect call site was deleted and replaced with a new direct @@ -359,13 +359,13 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, if (NumIndirectRemoved > NumIndirectAdded && NumDirectRemoved < NumDirectAdded) DevirtualizedCall = true; - + // After scanning this function, if we still have entries in callsites, then // they are dangling pointers. WeakTrackingVH should save us for this, so // abort if // this happens. assert(CallSites.empty() && "Dangling pointers found in call sites map"); - + // Periodically do an explicit clear to remove tombstones when processing // large scc's. if ((FunctionNo & 15) == 15) @@ -392,7 +392,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool &DevirtualizedCall) { bool Changed = false; - + // Keep track of whether the callgraph is known to be up-to-date or not. // The CGSSC pass manager runs two types of passes: // CallGraphSCC Passes and other random function passes. Because other @@ -406,7 +406,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, for (unsigned PassNo = 0, e = getNumContainedPasses(); PassNo != e; ++PassNo) { Pass *P = getContainedPass(PassNo); - + // If we're in -debug-pass=Executions mode, construct the SCC node list, // otherwise avoid constructing this string as it is expensive. if (isPassDebuggingExecutionsOrMore()) { @@ -423,23 +423,23 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); } dumpRequiredSet(P); - + initializeAnalysisImpl(P); - + // Actually run this pass on the current SCC. Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate, DevirtualizedCall); - + if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); dumpPreservedSet(P); - - verifyPreservedAnalysis(P); + + verifyPreservedAnalysis(P); removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); removeDeadPasses(P, "", ON_CG_MSG); } - + // If the callgraph was left out of date (because the last pass run was a // functionpass), refresh it before we move on to the next SCC. if (!CallGraphUpToDate) @@ -452,7 +452,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); bool Changed = doInitialization(CG); - + // Walk the callgraph in bottom-up SCC order. scc_iterator<CallGraph*> CGI = scc_begin(&CG); @@ -485,7 +485,7 @@ bool CGPassManager::runOnModule(Module &M) { DevirtualizedCall = false; Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); } while (Iteration++ < MaxIterations && DevirtualizedCall); - + if (DevirtualizedCall) LLVM_DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration @@ -500,7 +500,7 @@ bool CGPassManager::runOnModule(Module &M) { /// Initialize CG bool CGPassManager::doInitialization(CallGraph &CG) { bool Changed = false; - for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); @@ -515,7 +515,7 @@ bool CGPassManager::doInitialization(CallGraph &CG) { /// Finalize CG bool CGPassManager::doFinalization(CallGraph &CG) { bool Changed = false; - for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { assert(PM->getPassManagerType() == PMT_FunctionPassManager && "Invalid CGPassManager member"); @@ -541,7 +541,7 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { Nodes[i] = New; break; } - + // Update the active scc_iterator so that it doesn't contain dangling // pointers to the old CallGraphNode. scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context; @@ -555,18 +555,18 @@ void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { /// Assign pass manager to manage this pass. void CallGraphSCCPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { - // Find CGPassManager + // Find CGPassManager while (!PMS.empty() && PMS.top()->getPassManagerType() > PMT_CallGraphPassManager) PMS.pop(); assert(!PMS.empty() && "Unable to handle Call Graph Pass"); CGPassManager *CGP; - + if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager) CGP = (CGPassManager*)PMS.top(); else { - // Create new Call Graph SCC Pass Manager if it does not exist. + // Create new Call Graph SCC Pass Manager if it does not exist. assert(!PMS.empty() && "Unable to create Call Graph Pass Manager"); PMDataManager *PMD = PMS.top(); @@ -608,7 +608,7 @@ namespace { class PrintCallGraphPass : public CallGraphSCCPass { std::string Banner; raw_ostream &OS; // raw_ostream to print on. - + public: static char ID; @@ -640,10 +640,10 @@ namespace { } return false; } - + StringRef getPassName() const override { return "Print CallGraph IR"; } }; - + } // end anonymous namespace. char PrintCallGraphPass::ID = 0; diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 58c5bccff65d..e7637cd88327 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -272,7 +272,7 @@ void DemandedBits::performAnalysis() { // Analysis already completed for this function. return; Analyzed = true; - + Visited.clear(); AliveBits.clear(); @@ -367,7 +367,7 @@ void DemandedBits::performAnalysis() { APInt DemandedBits::getDemandedBits(Instruction *I) { performAnalysis(); - + const DataLayout &DL = I->getModule()->getDataLayout(); auto Found = AliveBits.find(I); if (Found != AliveBits.end()) diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index 197aee9dacb7..2c503609d96b 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -409,7 +409,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { if (Constant *C = GV->getInitializer()) if (!C->isNullValue()) return false; - + // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. for (User *U : GV->users()) { @@ -464,7 +464,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { return true; } -void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { +void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). unsigned SCCID = 0; @@ -633,7 +633,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, Inputs.push_back(V); do { const Value *Input = Inputs.pop_back_val(); - + if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) || isa<InvokeInst>(Input)) // Arguments to functions or returns from functions are inherently @@ -654,7 +654,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, if (auto *LI = dyn_cast<LoadInst>(Input)) { Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL)); continue; - } + } if (auto *SI = dyn_cast<SelectInst>(Input)) { const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); @@ -672,7 +672,7 @@ static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, } continue; } - + return false; } while (!Inputs.empty()); @@ -754,7 +754,7 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, // non-addr-taken globals. continue; } - + // Recurse through a limited number of selects, loads and PHIs. This is an // arbitrary depth of 4, lower numbers could be used to fix compile time // issues if needed, but this is generally expected to be only be important diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 519d6d67be51..7fc7c15a0c25 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -65,6 +65,48 @@ static Value *SimplifyCastInst(unsigned, Value *, Type *, static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &, unsigned); +static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal, + Value *FalseVal) { + BinaryOperator::BinaryOps BinOpCode; + if (auto *BO = dyn_cast<BinaryOperator>(Cond)) + BinOpCode = BO->getOpcode(); + else + return nullptr; + + CmpInst::Predicate ExpectedPred, Pred1, Pred2; + if (BinOpCode == BinaryOperator::Or) { + ExpectedPred = ICmpInst::ICMP_NE; + } else if (BinOpCode == BinaryOperator::And) { + ExpectedPred = ICmpInst::ICMP_EQ; + } else + return nullptr; + + // %A = icmp eq %TV, %FV + // %B = icmp eq %X, %Y (and one of these is a select operand) + // %C = and %A, %B + // %D = select %C, %TV, %FV + // --> + // %FV + + // %A = icmp ne %TV, %FV + // %B = icmp ne %X, %Y (and one of these is a select operand) + // %C = or %A, %B + // %D = select %C, %TV, %FV + // --> + // %TV + Value *X, *Y; + if (!match(Cond, m_c_BinOp(m_c_ICmp(Pred1, m_Specific(TrueVal), + m_Specific(FalseVal)), + m_ICmp(Pred2, m_Value(X), m_Value(Y)))) || + Pred1 != Pred2 || Pred1 != ExpectedPred) + return nullptr; + + if (X == TrueVal || X == FalseVal || Y == TrueVal || Y == FalseVal) + return BinOpCode == BinaryOperator::Or ? TrueVal : FalseVal; + + return nullptr; +} + /// For a boolean type or a vector of boolean type, return false or a vector /// with every element false. static Constant *getFalse(Type *Ty) { @@ -1283,6 +1325,23 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) return X; + // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A. + // We can return X as we do in the above case since OR alters no bits in X. + // SimplifyDemandedBits in InstCombine can do more general optimization for + // bit manipulation. This pattern aims to provide opportunities for other + // optimizers by supporting a simple but common case in InstSimplify. + Value *Y; + const APInt *ShRAmt, *ShLAmt; + if (match(Op1, m_APInt(ShRAmt)) && + match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) && + *ShRAmt == *ShLAmt) { + const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + const unsigned Width = Op0->getType()->getScalarSizeInBits(); + const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); + if (EffWidthY <= ShRAmt->getZExtValue()) + return X; + } + return nullptr; } @@ -3752,6 +3811,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; + if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal)) + return V; + return nullptr; } @@ -4604,149 +4666,131 @@ static bool maskIsAllZeroOrUndef(Value *Mask) { return true; } -template <typename IterTy> -static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, - const SimplifyQuery &Q, unsigned MaxRecurse) { +static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, + const SimplifyQuery &Q) { + // Idempotent functions return the same result when called repeatedly. Intrinsic::ID IID = F->getIntrinsicID(); - unsigned NumOperands = std::distance(ArgBegin, ArgEnd); - - // Unary Ops - if (NumOperands == 1) { - // Perform idempotent optimizations - if (IsIdempotent(IID)) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) { - if (II->getIntrinsicID() == IID) - return II; - } - } + if (IsIdempotent(IID)) + if (auto *II = dyn_cast<IntrinsicInst>(Op0)) + if (II->getIntrinsicID() == IID) + return II; - Value *IIOperand = *ArgBegin; - Value *X; - switch (IID) { - case Intrinsic::fabs: { - if (SignBitMustBeZero(IIOperand, Q.TLI)) - return IIOperand; - return nullptr; - } - case Intrinsic::bswap: { - // bswap(bswap(x)) -> x - if (match(IIOperand, m_BSwap(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::bitreverse: { - // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_BitReverse(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::exp: { - // exp(log(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::exp2: { - // exp2(log2(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::log: { - // log(exp(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) - return X; - return nullptr; - } - case Intrinsic::log2: { - // log2(exp2(x)) -> x - if (Q.CxtI->hasAllowReassoc() && - match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) { - return X; - } - return nullptr; - } - default: - return nullptr; - } + Value *X; + switch (IID) { + case Intrinsic::fabs: + if (SignBitMustBeZero(Op0, Q.TLI)) return Op0; + break; + case Intrinsic::bswap: + // bswap(bswap(x)) -> x + if (match(Op0, m_BSwap(m_Value(X)))) return X; + break; + case Intrinsic::bitreverse: + // bitreverse(bitreverse(x)) -> x + if (match(Op0, m_BitReverse(m_Value(X)))) return X; + break; + case Intrinsic::exp: + // exp(log(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X; + break; + case Intrinsic::exp2: + // exp2(log2(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X; + break; + case Intrinsic::log: + // log(exp(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X; + break; + case Intrinsic::log2: + // log2(exp2(x)) -> x + if (Q.CxtI->hasAllowReassoc() && + match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X; + break; + default: + break; } - // Binary Ops - if (NumOperands == 2) { - Value *LHS = *ArgBegin; - Value *RHS = *(ArgBegin + 1); - Type *ReturnType = F->getReturnType(); + return nullptr; +} - switch (IID) { - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: { - // X - X -> { 0, false } - if (LHS == RHS) - return Constant::getNullValue(ReturnType); +static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, + const SimplifyQuery &Q) { + Intrinsic::ID IID = F->getIntrinsicID(); + Type *ReturnType = F->getReturnType(); + switch (IID) { + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + // X - X -> { 0, false } + if (Op0 == Op1) + return Constant::getNullValue(ReturnType); + // X - undef -> undef + // undef - X -> undef + if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) + return UndefValue::get(ReturnType); + break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + // X + undef -> undef + if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) + return UndefValue::get(ReturnType); + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + // 0 * X -> { 0, false } + // X * 0 -> { 0, false } + if (match(Op0, m_Zero()) || match(Op1, m_Zero())) + return Constant::getNullValue(ReturnType); + // undef * X -> { 0, false } + // X * undef -> { 0, false } + if (match(Op0, m_Undef()) || match(Op1, m_Undef())) + return Constant::getNullValue(ReturnType); + break; + case Intrinsic::load_relative: + if (auto *C0 = dyn_cast<Constant>(Op0)) + if (auto *C1 = dyn_cast<Constant>(Op1)) + return SimplifyRelativeLoad(C0, C1, Q.DL); + break; + case Intrinsic::powi: + if (auto *Power = dyn_cast<ConstantInt>(Op1)) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ConstantFP::get(Op0->getType(), 1.0); + // powi(x, 1) -> x + if (Power->isOne()) + return Op0; + } + break; + case Intrinsic::maxnum: + case Intrinsic::minnum: + // If one argument is NaN, return the other argument. + if (match(Op0, m_NaN())) return Op1; + if (match(Op1, m_NaN())) return Op0; + break; + default: + break; + } - // X - undef -> undef - // undef - X -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return UndefValue::get(ReturnType); + return nullptr; +} - return nullptr; - } - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: { - // X + undef -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return UndefValue::get(ReturnType); +template <typename IterTy> +static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, + const SimplifyQuery &Q) { + // Intrinsics with no operands have some kind of side effect. Don't simplify. + unsigned NumOperands = std::distance(ArgBegin, ArgEnd); + if (NumOperands == 0) + return nullptr; - return nullptr; - } - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: { - // 0 * X -> { 0, false } - // X * 0 -> { 0, false } - if (match(LHS, m_Zero()) || match(RHS, m_Zero())) - return Constant::getNullValue(ReturnType); - - // undef * X -> { 0, false } - // X * undef -> { 0, false } - if (match(LHS, m_Undef()) || match(RHS, m_Undef())) - return Constant::getNullValue(ReturnType); + Intrinsic::ID IID = F->getIntrinsicID(); + if (NumOperands == 1) + return simplifyUnaryIntrinsic(F, ArgBegin[0], Q); - return nullptr; - } - case Intrinsic::load_relative: { - Constant *C0 = dyn_cast<Constant>(LHS); - Constant *C1 = dyn_cast<Constant>(RHS); - if (C0 && C1) - return SimplifyRelativeLoad(C0, C1, Q.DL); - return nullptr; - } - case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return ConstantFP::get(LHS->getType(), 1.0); - // powi(x, 1) -> x - if (Power->isOne()) - return LHS; - } - return nullptr; - case Intrinsic::maxnum: - case Intrinsic::minnum: - // If one argument is NaN, return the other argument. - if (match(LHS, m_NaN())) - return RHS; - if (match(RHS, m_NaN())) - return LHS; - return nullptr; - default: - return nullptr; - } - } + if (NumOperands == 2) + return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q); - // Simplify calls to llvm.masked.load.* + // Handle intrinsics with 3 or more arguments. switch (IID) { case Intrinsic::masked_load: { Value *MaskArg = ArgBegin[2]; @@ -4756,6 +4800,19 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return PassthruArg; return nullptr; } + case Intrinsic::fshl: + case Intrinsic::fshr: { + Value *ShAmtArg = ArgBegin[2]; + const APInt *ShAmtC; + if (match(ShAmtArg, m_APInt(ShAmtC))) { + // If there's effectively no shift, return the 1st arg or 2nd arg. + // TODO: For vectors, we could check each element of a non-splat constant. + APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth()); + if (ShAmtC->urem(BitWidth).isNullValue()) + return ArgBegin[IID == Intrinsic::fshl ? 0 : 1]; + } + return nullptr; + } default: return nullptr; } @@ -4780,7 +4837,7 @@ static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, return nullptr; if (F->isIntrinsic()) - if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) + if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q)) return Ret; if (!canConstantFoldCallTo(CS, F)) diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 435b6f205199..ee0148e0d795 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -725,7 +725,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(ValueLatticeElement &BBLV, // frequently arranged such that dominating ones come first and we quickly // find a path to function entry. TODO: We should consider explicitly // canonicalizing to make this true rather than relying on this happy - // accident. + // accident. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { ValueLatticeElement EdgeResult; if (!getEdgeValue(Val, *PI, BB, EdgeResult)) diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index c6175bf9bee9..a24d66011b8d 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -176,8 +176,8 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, /// Calculate Start and End points of memory access. /// Let's assume A is the first access and B is a memory access on N-th loop -/// iteration. Then B is calculated as: -/// B = A + Step*N . +/// iteration. Then B is calculated as: +/// B = A + Step*N . /// Step value may be positive or negative. /// N is a calculated back-edge taken count: /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 @@ -1317,7 +1317,7 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, return false; } -/// Given a non-constant (unknown) dependence-distance \p Dist between two +/// Given a non-constant (unknown) dependence-distance \p Dist between two /// memory accesses, that have the same stride whose absolute value is given /// in \p Stride, and that have the same type size \p TypeByteSize, /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is @@ -1336,19 +1336,19 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, // If we can prove that // (**) |Dist| > BackedgeTakenCount * Step - // where Step is the absolute stride of the memory accesses in bytes, + // where Step is the absolute stride of the memory accesses in bytes, // then there is no dependence. // - // Ratioanle: - // We basically want to check if the absolute distance (|Dist/Step|) - // is >= the loop iteration count (or > BackedgeTakenCount). - // This is equivalent to the Strong SIV Test (Practical Dependence Testing, - // Section 4.2.1); Note, that for vectorization it is sufficient to prove + // Ratioanle: + // We basically want to check if the absolute distance (|Dist/Step|) + // is >= the loop iteration count (or > BackedgeTakenCount). + // This is equivalent to the Strong SIV Test (Practical Dependence Testing, + // Section 4.2.1); Note, that for vectorization it is sufficient to prove // that the dependence distance is >= VF; This is checked elsewhere. - // But in some cases we can prune unknown dependence distances early, and - // even before selecting the VF, and without a runtime test, by comparing - // the distance against the loop iteration count. Since the vectorized code - // will be executed only if LoopCount >= VF, proving distance >= LoopCount + // But in some cases we can prune unknown dependence distances early, and + // even before selecting the VF, and without a runtime test, by comparing + // the distance against the loop iteration count. Since the vectorized code + // will be executed only if LoopCount >= VF, proving distance >= LoopCount // also guarantees that distance >= VF. // const uint64_t ByteStride = Stride * TypeByteSize; @@ -1360,8 +1360,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); - // The dependence distance can be positive/negative, so we sign extend Dist; - // The multiplication of the absolute stride in bytes and the + // The dependence distance can be positive/negative, so we sign extend Dist; + // The multiplication of the absolute stride in bytes and the // backdgeTakenCount is non-negative, so we zero extend Product. if (DistTypeSize > ProductTypeSize) CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); @@ -2212,24 +2212,24 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { "versioning:"); LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); - // Avoid adding the "Stride == 1" predicate when we know that + // Avoid adding the "Stride == 1" predicate when we know that // Stride >= Trip-Count. Such a predicate will effectively optimize a single // or zero iteration loop, as Trip-Count <= Stride == 1. - // + // // TODO: We are currently not making a very informed decision on when it is // beneficial to apply stride versioning. It might make more sense that the - // users of this analysis (such as the vectorizer) will trigger it, based on - // their specific cost considerations; For example, in cases where stride + // users of this analysis (such as the vectorizer) will trigger it, based on + // their specific cost considerations; For example, in cases where stride // versioning does not help resolving memory accesses/dependences, the - // vectorizer should evaluate the cost of the runtime test, and the benefit - // of various possible stride specializations, considering the alternatives - // of using gather/scatters (if available). - + // vectorizer should evaluate the cost of the runtime test, and the benefit + // of various possible stride specializations, considering the alternatives + // of using gather/scatters (if available). + const SCEV *StrideExpr = PSE->getSCEV(Stride); - const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); + const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); // Match the types so we can compare the stride and the BETakenCount. - // The Stride can be positive/negative, so we sign extend Stride; + // The Stride can be positive/negative, so we sign extend Stride; // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); @@ -2243,7 +2243,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); // Since TripCount == BackEdgeTakenCount + 1, checking: - // "Stride >= TripCount" is equivalent to checking: + // "Stride >= TripCount" is equivalent to checking: // Stride - BETakenCount > 0 if (SE->isKnownPositive(StrideMinusBETaken)) { LLVM_DEBUG( diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 5c0cbb26484c..5a6bbd7b2ac6 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -118,7 +118,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { } else { SmallVector<NonLocalDepResult, 4> NLDI; assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || - isa<VAArgInst>(Inst)) && "Unknown memory instruction!"); + isa<VAArgInst>(Inst)) && "Unknown memory instruction!"); MDA.getNonLocalPointerDependency(Inst, NLDI); DepSet &InstDeps = Deps[Inst]; diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 7eeefd54f007..feae53c54ecb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" @@ -1513,6 +1514,8 @@ void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); // Flush load info for the pointer. RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); + // Invalidate phis that use the pointer. + PV.invalidateValue(Ptr); } void MemoryDependenceResults::invalidateCachedPredecessors() { @@ -1671,6 +1674,9 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) { } } + // Invalidate phis that use the removed instruction. + PV.invalidateValue(RemInst); + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); LLVM_DEBUG(verifyRemoved(RemInst)); } @@ -1730,7 +1736,8 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AC = AM.getResult<AssumptionAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - return MemoryDependenceResults(AA, AC, TLI, DT); + auto &PV = AM.getResult<PhiValuesAnalysis>(F); + return MemoryDependenceResults(AA, AC, TLI, DT, PV); } char MemoryDependenceWrapperPass::ID = 0; @@ -1741,6 +1748,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass) INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) @@ -1758,6 +1766,7 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PhiValuesWrapperPass>(); AU.addRequiredTransitive<AAResultsWrapperPass>(); AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); } @@ -1773,7 +1782,8 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P // Check whether the analyses we depend on became invalid for any reason. if (Inv.invalidate<AAManager>(F, PA) || Inv.invalidate<AssumptionAnalysis>(F, PA) || - Inv.invalidate<DominatorTreeAnalysis>(F, PA)) + Inv.invalidate<DominatorTreeAnalysis>(F, PA) || + Inv.invalidate<PhiValuesAnalysis>(F, PA)) return true; // Otherwise this analysis result remains valid. @@ -1789,6 +1799,7 @@ bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - MemDep.emplace(AA, AC, TLI, DT); + auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult(); + MemDep.emplace(AA, AC, TLI, DT, PV); return false; } diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp index fc4049874622..8e85366b4618 100644 --- a/lib/Analysis/MustExecute.cpp +++ b/lib/Analysis/MustExecute.cpp @@ -235,7 +235,7 @@ public: } - void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { + void printInfoComment(const Value &V, formatted_raw_ostream &OS) override { if (!MustExec.count(&V)) return; @@ -245,7 +245,7 @@ public: OS << " ; (mustexec in " << NumLoops << " loops: "; else OS << " ; (mustexec in: "; - + bool first = true; for (const Loop *L : Loops) { if (!first) @@ -264,6 +264,6 @@ bool MustExecutePrinter::runOnFunction(Function &F) { MustExecuteAnnotatedWriter Writer(F, DT, LI); F.print(dbgs(), &Writer); - + return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index aa95ace93014..0e715b8814ff 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4839,7 +4839,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) // for each of StartVal and Accum - auto getExtendedExpr = [&](const SCEV *Expr, + auto getExtendedExpr = [&](const SCEV *Expr, bool CreateSignExtend) -> const SCEV * { assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); @@ -4935,11 +4935,11 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { return Rewrite; } -// FIXME: This utility is currently required because the Rewriter currently -// does not rewrite this expression: -// {0, +, (sext ix (trunc iy to ix) to iy)} +// FIXME: This utility is currently required because the Rewriter currently +// does not rewrite this expression: +// {0, +, (sext ix (trunc iy to ix) to iy)} // into {0, +, %step}, -// even when the following Equal predicate exists: +// even when the following Equal predicate exists: // "%step == (sext ix (trunc iy to ix) to iy)". bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 9de2f789c89c..7233a86e5daf 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -721,7 +721,7 @@ struct ReductionData { static Optional<ReductionData> getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) - return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast<SelectInst>(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || @@ -730,8 +730,8 @@ static Optional<ReductionData> getReductionData(Instruction *I) { m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); - return ReductionData(RK_MinMax, CI->getOpcode(), L, R); - } + return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast<CmpInst>(SI->getCondition()); @@ -851,11 +851,11 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. - // + // // (X0, X1, X2, X3) // (X0 + X1, X2 + X3, undef, undef) // ((X0 + X1) + (X2 + X3), undef, undef, undef) - // + // // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, @@ -916,7 +916,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. - // + // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf @@ -927,7 +927,7 @@ matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned MaskStart = 1; Instruction *RdxOp = RdxStart; - SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. @@ -1093,7 +1093,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { case Instruction::InsertElement: { const InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); - unsigned Idx = -1; + unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return getVectorInstrCost(I->getOpcode(), @@ -1104,7 +1104,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { // TODO: Identify and add costs for insert/extract subvector, etc. if (Shuffle->changesLength()) return -1; - + if (Shuffle->isIdentity()) return 0; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 04a7b73c22bf..0ef39163bda3 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -71,7 +71,7 @@ #include <cassert> #include <cstdint> #include <iterator> -#include <utility> +#include <utility> using namespace llvm; using namespace llvm::PatternMatch; @@ -3828,7 +3828,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, // If either of the values is known to be non-negative, adding them can only // overflow if the second is also non-negative, so we can assume that. - // Two non-negative numbers will only overflow if there is a carry to the + // Two non-negative numbers will only overflow if there is a carry to the // sign bit, so we can check if even when the values are as big as possible // there is no overflow to the sign bit. if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { @@ -3855,7 +3855,7 @@ static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, } // If we reached here it means that we know nothing about the sign bits. - // In this case we can't know if there will be an overflow, since by + // In this case we can't know if there will be an overflow, since by // changing the sign bits any two values can be made to overflow. return false; } @@ -3905,7 +3905,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // operands. bool LHSOrRHSKnownNonNegative = (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); - bool LHSOrRHSKnownNegative = + bool LHSOrRHSKnownNegative = (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); @@ -4454,7 +4454,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; - + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4630,7 +4630,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; } } - + if (isKnownNegation(TrueVal, FalseVal)) { // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can // match against either LHS or sext(LHS). |