From 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 8 Apr 2013 18:41:23 +0000 Subject: Vendor import of llvm trunk r178860: http://llvm.org/svn/llvm-project/llvm/trunk@178860 --- lib/Analysis/AliasAnalysis.cpp | 70 +- lib/Analysis/AliasAnalysisCounter.cpp | 2 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 86 +- lib/Analysis/AliasDebugger.cpp | 10 +- lib/Analysis/AliasSetTracker.cpp | 12 +- lib/Analysis/Analysis.cpp | 4 +- lib/Analysis/BasicAliasAnalysis.cpp | 95 +- lib/Analysis/BlockFrequencyInfo.cpp | 6 +- lib/Analysis/BranchProbabilityInfo.cpp | 12 +- lib/Analysis/CFGPrinter.cpp | 1 - lib/Analysis/CMakeLists.txt | 4 +- lib/Analysis/CaptureTracking.cpp | 5 + lib/Analysis/CodeMetrics.cpp | 144 +-- lib/Analysis/ConstantFolding.cpp | 256 +++-- lib/Analysis/CostModel.cpp | 122 ++- lib/Analysis/DbgInfoPrinter.cpp | 224 ----- lib/Analysis/DependenceAnalysis.cpp | 302 +++--- lib/Analysis/DominanceFrontier.cpp | 2 +- lib/Analysis/IPA/CMakeLists.txt | 2 + lib/Analysis/IPA/CallGraph.cpp | 6 +- lib/Analysis/IPA/CallGraphSCCPass.cpp | 13 +- lib/Analysis/IPA/CallPrinter.cpp | 87 ++ lib/Analysis/IPA/FindUsedTypes.cpp | 6 +- lib/Analysis/IPA/GlobalsModRef.cpp | 16 +- lib/Analysis/IPA/IPA.cpp | 2 + lib/Analysis/IPA/InlineCost.cpp | 1239 +++++++++++++++++++++++++ lib/Analysis/IVUsers.cpp | 12 +- lib/Analysis/InlineCost.cpp | 1067 --------------------- lib/Analysis/InstCount.cpp | 10 +- lib/Analysis/InstructionSimplify.cpp | 506 +++++++--- lib/Analysis/Interval.cpp | 2 +- lib/Analysis/LazyValueInfo.cpp | 19 +- lib/Analysis/LibCallAliasAnalysis.cpp | 4 +- lib/Analysis/LibCallSemantics.cpp | 2 +- lib/Analysis/Lint.cpp | 102 +- lib/Analysis/Loads.cpp | 57 +- lib/Analysis/LoopInfo.cpp | 74 +- lib/Analysis/MemDepPrinter.cpp | 8 +- lib/Analysis/MemoryBuiltins.cpp | 131 ++- lib/Analysis/MemoryDependenceAnalysis.cpp | 324 ++++--- lib/Analysis/ModuleDebugInfoPrinter.cpp | 4 +- lib/Analysis/NoAliasAnalysis.cpp | 4 +- lib/Analysis/PHITransAddr.cpp | 6 +- lib/Analysis/PathNumbering.cpp | 15 +- lib/Analysis/PathProfileInfo.cpp | 7 +- lib/Analysis/PathProfileVerifier.cpp | 13 +- lib/Analysis/PostDominators.cpp | 8 +- lib/Analysis/ProfileDataLoader.cpp | 6 +- lib/Analysis/ProfileDataLoaderPass.cpp | 22 +- lib/Analysis/ProfileEstimatorPass.cpp | 6 +- lib/Analysis/ProfileInfo.cpp | 16 +- lib/Analysis/ProfileInfoLoader.cpp | 4 +- lib/Analysis/ProfileInfoLoaderPass.cpp | 16 +- lib/Analysis/ProfileVerifierPass.cpp | 15 +- lib/Analysis/PtrUseVisitor.cpp | 36 + lib/Analysis/RegionInfo.cpp | 7 +- lib/Analysis/RegionPrinter.cpp | 12 +- lib/Analysis/ScalarEvolution.cpp | 61 +- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 2 +- lib/Analysis/ScalarEvolutionExpander.cpp | 23 +- lib/Analysis/SparsePropagation.cpp | 6 +- lib/Analysis/TargetTransformInfo.cpp | 558 +++++++++++ lib/Analysis/Trace.cpp | 2 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 10 +- lib/Analysis/ValueTracking.cpp | 390 +++++--- 65 files changed, 3777 insertions(+), 2518 deletions(-) delete mode 100644 lib/Analysis/DbgInfoPrinter.cpp create mode 100644 lib/Analysis/IPA/CallPrinter.cpp create mode 100644 lib/Analysis/IPA/InlineCost.cpp delete mode 100644 lib/Analysis/InlineCost.cpp create mode 100644 lib/Analysis/PtrUseVisitor.cpp create mode 100644 lib/Analysis/TargetTransformInfo.cpp (limited to 'lib/Analysis') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 752edd52b454f..210b80ab63ef7 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -28,14 +28,14 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/BasicBlock.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; @@ -361,8 +361,28 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { } namespace { + // Conservatively return true. Return false, if there is a single path + // starting from "From" and the path does not reach "To". + static bool hasPath(const BasicBlock *From, const BasicBlock *To) { + const unsigned MaxCheck = 5; + const BasicBlock *Current = From; + for (unsigned I = 0; I < MaxCheck; I++) { + unsigned NumSuccs = Current->getTerminator()->getNumSuccessors(); + if (NumSuccs > 1) + return true; + if (NumSuccs == 0) + return false; + Current = Current->getTerminator()->getSuccessor(0); + if (Current == To) + return true; + } + return true; + } + /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. + /// Only support the case where the Value is defined in the same basic block + /// as the given instruction and the use. struct CapturesBefore : public CaptureTracker { CapturesBefore(const Instruction *I, DominatorTree *DT) : BeforeHere(I), DT(DT), Captured(false) {} @@ -372,8 +392,15 @@ namespace { bool shouldExplore(Use *U) { Instruction *I = cast(U->getUser()); BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + // We explore this usage only if the usage can reach "BeforeHere". + // If use is not reachable from entry, there is no need to explore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + // If the value is defined in the same basic block as use and BeforeHere, + // there is no need to explore the use if BeforeHere dominates use. + // Check whether there is a path from I to BeforeHere. + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !hasPath(BB, BeforeHere->getParent())) return false; return true; } @@ -381,8 +408,11 @@ namespace { bool captured(Use *U) { Instruction *I = cast(U->getUser()); BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + // Same logic as in shouldExplore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !hasPath(BB, BeforeHere->getParent())) return false; Captured = true; return true; @@ -503,7 +533,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, bool llvm::isNoAliasCall(const Value *V) { if (isa(V) || isa(V)) return ImmutableCallSite(cast(V)) - .paramHasAttr(0, Attributes::NoAlias); + .paramHasAttr(0, Attribute::NoAlias); return false; } @@ -525,19 +555,3 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } - -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -bool llvm::isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 9f219f563739a..9f4a47c77e035 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index ac72983a8d7b3..a571463dfe127 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -17,19 +17,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; static cl::opt PrintAll("print-all-alias-modref-info", cl::ReallyHidden); @@ -44,6 +44,8 @@ static cl::opt PrintMod("print-mod", cl::ReallyHidden); static cl::opt PrintRef("print-ref", cl::ReallyHidden); static cl::opt PrintModRef("print-modref", cl::ReallyHidden); +static cl::opt EvalTBAA("evaluate-tbaa", cl::ReallyHidden); + namespace { class AAEval : public FunctionPass { unsigned NoAlias, MayAlias, PartialAlias, MustAlias; @@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, } } +static inline void +PrintLoadStoreResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + errs() << " " << Msg << ": " << *V1 + << " <-> " << *V2 << '\n'; + } +} + static inline bool isInterestingPointer(Value *V) { return V->getType()->isPointerTy() && !isa(V); @@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) { SetVector Pointers; SetVector CallSites; + SetVector Loads; + SetVector Stores; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) // Add all pointer arguments. @@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); + if (EvalTBAA && isa(&*I)) + Loads.insert(&*I); + if (EvalTBAA && isa(&*I)) + Stores.insert(&*I); Instruction &Inst = *I; if (CallSite CS = cast(&Inst)) { Value *Callee = CS.getCalledValue(); @@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) { } } + if (EvalTBAA) { + // iterate over all pairs of load, store + for (SetVector::iterator I1 = Loads.begin(), E = Loads.end(); + I1 != E; ++I1) { + for (SetVector::iterator I2 = Stores.begin(), E2 = Stores.end(); + I2 != E2; ++I2) { + switch (AA.alias(AA.getLocation(cast(*I1)), + AA.getLocation(cast(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + + // iterate over all pairs of store, store + for (SetVector::iterator I1 = Stores.begin(), E = Stores.end(); + I1 != E; ++I1) { + for (SetVector::iterator I2 = Stores.begin(); I2 != I1; ++I2) { + switch (AA.alias(AA.getLocation(cast(*I1)), + AA.getLocation(cast(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + } + // Mod/ref alias analysis: compare all pairs of calls and values for (SetVector::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index f15c05153e106..f6178e36f0a95 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -17,12 +17,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include using namespace llvm; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 388c755cbd317..591052671d6ee 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -13,13 +13,13 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 9dc81a6a630f0..66e416cd140c9 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -9,8 +9,8 @@ #include "llvm-c/Analysis.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/InitializePasses.h" #include using namespace llvm; @@ -31,7 +31,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); - initializePrintDbgInfoPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); @@ -70,6 +69,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionOnlyPrinterPass(Registry); initializeScalarEvolutionPass(Registry); initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeTargetTransformInfoAnalysisGroup(Registry); initializeTypeBasedAliasAnalysisPass(Registry); } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 4bb93ee88a49e..ae6da1af0c4f5 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -13,28 +13,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" -#include "llvm/Pass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -631,7 +631,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { // For intrinsics, we can check the table. if (unsigned iid = F->getIntrinsicID()) { #define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/Intrinsics.gen" +#include "llvm/IR/Intrinsics.gen" #undef GET_INTRINSIC_MODREF_BEHAVIOR } @@ -851,9 +851,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. if (const GEPOperator *GEP2 = dyn_cast(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); + // Check for geps of non-aliasing underlying pointers where the offsets are // identical. - if (V1Size == V2Size) { + if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1TBAAInfo, UnderlyingV2, @@ -881,10 +885,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, GEP1VariableIndices.clear(); } } - - // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. @@ -1064,39 +1064,20 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, Location(V2, V2Size, V2TBAAInfo)); if (PN > V2) std::swap(Locs.first, Locs.second); - - AliasResult Alias = - aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, - PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), - V2Size, V2TBAAInfo); - if (Alias == MayAlias) - return MayAlias; - - // If the first source of the PHI nodes NoAlias and the other inputs are - // the PHI node itself through some amount of recursion this does not add - // any new information so just return NoAlias. - // bb: - // ptr = ptr2 + 1 - // loop: - // ptr_phi = phi [bb, ptr], [loop, ptr_plus_one] - // ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one] - // ... - // ptr_plus_one = gep ptr_phi, 1 - // ptr2_plus_one = gep ptr2_phi, 1 - // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do - // not alias each other. - bool ArePhisAssumedNoAlias = false; - AliasResult OrigAliasResult = NoAlias; - if (Alias == NoAlias) { - // Pretend the phis do not alias. - assert(AliasCache.count(Locs) && - "There must exist an entry for the phi node"); - OrigAliasResult = AliasCache[Locs]; - AliasCache[Locs] = NoAlias; - ArePhisAssumedNoAlias = true; - } - - for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + // Analyse the PHIs' inputs under the assumption that the PHIs are + // NoAlias. + // If the PHIs are May/MustAlias there must be (recursively) an input + // operand from outside the PHIs' cycle that is MayAlias/MustAlias or + // there must be an operation on the PHIs within the PHIs' value cycle + // that causes a MayAlias. + // Pretend the phis do not alias. + AliasResult Alias = NoAlias; + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + AliasResult OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), @@ -1107,7 +1088,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, } // Reset if speculation failed. - if (ArePhisAssumedNoAlias && Alias != NoAlias) + if (Alias != NoAlias) AliasCache[Locs] = OrigAliasResult; return Alias; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 8a660f737c9b2..100e5c8ae7dd6 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 04a6560262cb3..6c5885601fa33 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 76854000bd233..9b6879a42ed40 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -18,7 +18,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFGPrinter.h" - #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index b3a40bee4211a..597c767a8e04b 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -13,12 +13,10 @@ add_llvm_library(LLVMAnalysis CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp - DbgInfoPrinter.cpp DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp - InlineCost.cpp InstCount.cpp InstructionSimplify.cpp Interval.cpp @@ -47,6 +45,7 @@ add_llvm_library(LLVMAnalysis ProfileVerifierPass.cpp ProfileDataLoader.cpp ProfileDataLoaderPass.cpp + PtrUseVisitor.cpp RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp @@ -55,6 +54,7 @@ add_llvm_library(LLVMAnalysis ScalarEvolutionExpander.cpp ScalarEvolutionNormalization.cpp SparsePropagation.cpp + TargetTransformInfo.cpp Trace.cpp TypeBasedAliasAnalysis.cpp ValueTracking.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index d9c02990a8012..a7292706dfa88 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -18,7 +18,12 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" + using namespace llvm; CaptureTracker::~CaptureTracker() {} diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 651a54be1b9ee..8cda01a24c0d4 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -12,121 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Function.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CallSite.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/DataLayout.h" using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(ImmutableCallSite CS) { - if (isa(CS.getInstruction())) - return true; - - const Function *F = CS.getCalledFunction(); - if (!F) return false; - - if (F->hasLocalLinkage()) return false; - - if (!F->hasName()) return false; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; - - return false; -} - -bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { - if (isa(I)) - return true; - - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (const GetElementPtrInst *GEP = dyn_cast(I)) - return GEP->hasAllConstantIndices(); - - if (const IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - return true; - } - } - - if (const CastInst *CI = dyn_cast(I)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast()) - return true; - - Value *Op = CI->getOperand(0); - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - if (isa(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) - return true; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - if (isa(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) - return true; - - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (TD && isa(CI) && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - return true; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa(CI->getOperand(0))) - return true; - } - - return false; -} - /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const DataLayout *TD) { + const TargetTransformInfo &TTI) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isInstructionFree(II, TD)) - continue; - // Special handling for calls. if (isa(II) || isa(II)) { ImmutableCallSite CS(cast(II)); @@ -144,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // for that case. if (F == BB->getParent()) isRecursive = true; - } - - if (!callIsSmall(CS)) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); + if (TTI.isLoweredToCall(F)) + ++NumCalls; + } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. if (!isa(CS.getCalledValue())) @@ -165,7 +64,15 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa(II) || II->getType()->isVectorTy()) ++NumVectorInsts; - ++NumInsts; + if (const CallInst *CI = dyn_cast(II)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + if (const InvokeInst *InvI = dyn_cast(II)) + if (InvI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + NumInsts += TTI.getUserCost(&*II); } if (isa(BB->getTerminator())) @@ -182,23 +89,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. - if (isa(BB->getTerminator())) - containsIndirectBr = true; + notDuplicatable |= isa(BB->getTerminator()); // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } - -void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) { - // If this function contains a call that "returns twice" (e.g., setjmp or - // _setjmp) and it isn't marked with "returns twice" itself, never inline it. - // This is a hack because we depend on the user marking their local variables - // as volatile if they are live across a setjmp call, and they probably - // won't do this in callers. - exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->getFnAttributes().hasAttribute(Attributes::ReturnsTwice); - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); -} diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 91a5b84e8a635..09d7608c51da7 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -9,30 +9,30 @@ // // This file defines routines for folding instructions into constants. // -// Also, to supplement the basic VMCore ConstantExpr simplifications, +// Also, to supplement the basic IR ConstantExpr simplifications, // this file defines some additional folding routines that can make use of -// DataLayout information. These functions cannot go in VMCore due to library +// DataLayout information. These functions cannot go in IR due to library // dependency issues. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Operator.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FEnv.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/FEnv.h" +#include "llvm/Target/TargetLibraryInfo.h" #include #include using namespace llvm; @@ -54,13 +54,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast(DestTy)) { - ConstantDataVector *CDV = dyn_cast(C); - if (CDV == 0) + VectorType *VTy = dyn_cast(C->getType()); + if (VTy == 0) return ConstantExpr::getBitCast(C, DestTy); - unsigned NumSrcElts = CDV->getType()->getNumElements(); - - Type *SrcEltTy = CDV->getType()->getElementType(); + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int // to simplify things. @@ -68,11 +67,14 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); - // Ask VMCore to do the conversion now that #elts line up. + // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CDV = cast(C); } + ConstantDataVector *CDV = dyn_cast(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); @@ -104,7 +106,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, if (!isa(C) && !isa(C)) return ConstantExpr::getBitCast(C, DestTy); - // If the element types match, VMCore can fold it. + // If the element types match, IR can fold it. unsigned NumDstElt = DestVTy->getNumElements(); unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) @@ -131,7 +133,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - // Finally, VMCore can handle this now that #elts line up. + // Finally, IR can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); } @@ -141,9 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); - // Ask VMCore to do the conversion now that #elts line up. + // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - // If VMCore wasn't able to fold it, bail out. + // If IR wasn't able to fold it, bail out. if (!isa(C) && // FIXME: Remove ConstantVector. !isa(C)) return C; @@ -218,10 +220,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - int64_t &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { - Offset = 0; + Offset.clearAllBits(); return true; } @@ -235,34 +237,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (CE->getOpcode() == Instruction::GetElementPtr) { - // Cannot compute this if the element type of the pointer is missing size - // info. - if (!cast(CE->getOperand(0)->getType()) - ->getElementType()->isSized()) - return false; - + if (GEPOperator *GEP = dyn_cast(CE)) { // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; // Otherwise, add any offset that our operands provide. - gep_type_iterator GTI = gep_type_begin(CE); - for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); - i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast(*i); - if (!CI) return false; // Index isn't a simple constant? - if (CI->isZero()) continue; // Not adding anything. - - if (StructType *ST = dyn_cast(*GTI)) { - // N = N + Offset - Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); - } else { - SequentialType *SQT = cast(*GTI); - Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); - } - } - return true; + return GEP->accumulateConstantOffset(TD, Offset); } return false; @@ -310,6 +291,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); } + if (CFP->getType()->isHalfTy()){ + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } return false; } @@ -402,7 +387,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; - if (LoadTy->isFloatTy()) + if (LoadTy->isHalfTy()) + MapTy = Type::getInt16PtrTy(C->getContext()); + else if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64PtrTy(C->getContext()); @@ -423,7 +410,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, if (BytesLoaded > 32 || BytesLoaded == 0) return 0; GlobalValue *GVal; - int64_t Offset; + APInt Offset(TD.getPointerSizeInBits(), 0); if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -434,14 +421,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset < 0) return 0; + if (Offset.isNegative()) return 0; // If we're not accessing anything in this constant, the result is undefined. - if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + if (Offset.getZExtValue() >= + TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; - if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) return 0; @@ -550,10 +538,10 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, -/// otherwise TD is null. +/// these together. If target data info is available, it is provided as DL, +/// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *TD){ + Constant *Op1, const DataLayout *DL){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -561,17 +549,44 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // bits. + if (Opc == Instruction::And && DL) { + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()); + APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); + APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); + ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + // All the bits of Op0 that the 'and' could be masking are already zero. + return Op0; + } + if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + // All the bits of Op1 that the 'and' could be masking are already zero. + return Op1; + } + + APInt KnownZero = KnownZero0 | KnownZero1; + APInt KnownOne = KnownOne0 & KnownOne1; + if ((KnownZero | KnownOne).isAllOnesValue()) { + return ConstantInt::get(Op0->getType(), KnownOne); + } + } + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && TD) { + if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - int64_t Offs1, Offs2; + unsigned PtrSize = DL->getPointerSizeInBits(); + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. - return ConstantInt::get(Op0->getType(), Offs1-Offs2); + // PtrToInt may change the bitwidth so we have convert to the right size + // first. + return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - + Offs2.zextOrTrunc(OpSize)); } } @@ -1104,6 +1119,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { + case Intrinsic::fabs: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::floor: case Intrinsic::sqrt: case Intrinsic::pow: case Intrinsic::powi: @@ -1142,8 +1164,7 @@ llvm::canConstantFoldCallTo(const Function *F) { switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || - Name == "atan" || Name == "atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': @@ -1171,11 +1192,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1187,11 +1214,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1243,7 +1276,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (!TLI) return 0; - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1256,8 +1289,46 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : - Op->getValueAPF().convertToDouble(); + double V; + if (Ty->isFloatTy()) + V = Op->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + V = Op->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + V = APF.convertToDouble(); + } + + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); +#if HAVE_LOG2 + case Intrinsic::log2: + return ConstantFoldFP(log2, V, Ty); +#endif +#if HAVE_LOG + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); +#endif +#if HAVE_LOG10 + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); +#endif +#if HAVE_EXP + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); +#endif +#if HAVE_EXP2 + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); +#endif + case Intrinsic::floor: + return ConstantFoldFP(floor, V, Ty); + } + switch (Name[0]) { case 'a': if (Name == "acos" && TLI->has(LibFunc::acos)) @@ -1299,7 +1370,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && - (Ty->isFloatTy() || Ty->isDoubleTy())) { + (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); else // Undefined @@ -1337,7 +1408,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); case Intrinsic::convert_from_fp16: { - APFloat Val(Op->getValue()); + APFloat Val(APFloat::IEEEhalf, Op->getValue()); bool lost = false; APFloat::opStatus status = @@ -1391,18 +1462,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? - (double)Op1->getValueAPF().convertToFloat() : - Op1->getValueAPF().convertToDouble(); + double Op1V; + if (Ty->isFloatTy()) + Op1V = Op1->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op1V = Op1->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op1->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op1V = APF.convertToDouble(); + } + if (ConstantFP *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? - (double)Op2->getValueAPF().convertToFloat(): - Op2->getValueAPF().convertToDouble(); + double Op2V; + if (Ty->isFloatTy()) + Op2V = Op2->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op2V = Op2->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op2->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op2V = APF.convertToDouble(); + } if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); @@ -1416,6 +1504,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) return ConstantFP::get(F->getContext(), APFloat((float)std::pow((float)Op1V, @@ -1468,12 +1560,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, return ConstantStruct::get(cast(F->getReturnType()), Ops); } case Intrinsic::cttz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); case Intrinsic::ctlz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 5adbf458104e1..98a7780ad9a6b 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -8,20 +8,24 @@ //===----------------------------------------------------------------------===// // // This file defines the cost model analysis. It provides a very basic cost -// estimation for LLVM-IR. The cost result can be thought of as cycles, but it -// is really unit-less. The estimated cost is ment to be used for comparing -// alternatives. +// estimation for LLVM-IR. This analysis uses the services of the codegen +// to approximate the cost of any IR instruction when lowered to machine +// instructions. The cost results are unit-less and the cost number represents +// the throughput of the machine assuming that all loads hit the cache, all +// branches are predicted, etc. The cost numbers can be added in order to +// compare two or more transformation alternatives. // //===----------------------------------------------------------------------===// #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME #include "llvm/Analysis/Passes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/TargetTransformInfo.h" -#include "llvm/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -31,7 +35,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) { + CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -40,7 +44,7 @@ namespace { /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - unsigned getInstructionCost(Instruction *I) const; + unsigned getInstructionCost(const Instruction *I) const; private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -49,8 +53,8 @@ namespace { /// The function that we analyze. Function *F; - /// Vector target information. - const VectorTargetTransformInfo *VTTI; + /// Target information. + const TargetTransformInfo *TTI; }; } // End of anonymous namespace @@ -72,25 +76,49 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool CostModelAnalysis::runOnFunction(Function &F) { this->F = &F; - - // Target information. - TargetTransformInfo *TTI; TTI = getAnalysisIfAvailable(); - if (TTI) - VTTI = TTI->getVectorTargetTransformInfo(); return false; } -unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { - if (!VTTI) +static bool isReverseVectorMask(SmallVector &Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant. + ConstantDataVector *CDV = 0; + if ((CDV = dyn_cast(V))) + if (CDV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + ConstantVector *CV = 0; + if ((CV = dyn_cast(V))) + if (CV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + + return OpInfo; +} + +unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { + if (!TTI) return -1; switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { - return VTTI->getCFInstrCost(I->getOpcode()); + return TTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: @@ -110,28 +138,33 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK); } case Instruction::Select: { - SelectInst *SI = cast(I); + const SelectInst *SI = cast(I); Type *CondTy = SI->getCondition()->getType(); - return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { - StoreInst *SI = cast(I); + const StoreInst *SI = cast(I); Type *ValTy = SI->getValueOperand()->getType(); - return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + return TTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { - LoadInst *LI = cast(I); - return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + const LoadInst *LI = cast(I); + return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } @@ -148,26 +181,47 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { - ExtractElementInst * EEI = cast(I); + const ExtractElementInst * EEI = cast(I); ConstantInt *CI = dyn_cast(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); - return VTTI->getVectorInstrCost(I->getOpcode(), - EEI->getOperand(0)->getType(), Idx); + return TTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { - InsertElementInst * IE = cast(I); + const InsertElementInst * IE = cast(I); ConstantInt *CI = dyn_cast(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); - return VTTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); + return TTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, + 0); + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) { + SmallVector Tys; + for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) + Tys.push_back(II->getArgOperand(J)->getType()); + + return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Tys); + } + return -1; default: // We don't have any information on this instruction. return -1; diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp deleted file mode 100644 index 41cd34c07be0c..0000000000000 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ /dev/null @@ -1,224 +0,0 @@ -//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that prints instructions, and associated debug -// info: -// -// - source/line/col information -// - original variable name -// - original type name -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static cl::opt -PrintDirectory("print-fullpath", - cl::desc("Print fullpath when printing debug info"), - cl::Hidden); - -namespace { - class PrintDbgInfo : public FunctionPass { - raw_ostream &Out; - void printVariableDeclaration(const Value *V); - public: - static char ID; // Pass identification - PrintDbgInfo() : FunctionPass(ID), Out(errs()) { - initializePrintDbgInfoPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; - char PrintDbgInfo::ID = 0; -} - -INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", - "Print debug info in human readable form", false, false) - -FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } - -/// Find the debug info descriptor corresponding to this global variable. -static Value *findDbgGlobalDeclare(GlobalVariable *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast(NMD->getOperand(i))); - if (!DIG.isGlobalVariable()) - continue; - if (DIGlobalVariable(DIG).getGlobal() == V) - return DIG; - } - return 0; -} - -/// Find the debug info descriptor corresponding to this function. -static Value *findDbgSubprogramDeclare(Function *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast(NMD->getOperand(i))); - if (!DIG.isSubprogram()) - continue; - if (DISubprogram(DIG).getFunction() == V) - return DIG; - } - return 0; -} - -/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. -/// It looks through pointer casts too. -static const DbgDeclareInst *findDbgDeclare(const Value *V) { - V = V->stripPointerCasts(); - - if (!isa(V) && !isa(V)) - return 0; - - const Function *F = NULL; - if (const Instruction *I = dyn_cast(V)) - F = I->getParent()->getParent(); - else if (const Argument *A = dyn_cast(V)) - F = A->getParent(); - - for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) - if (const DbgDeclareInst *DDI = dyn_cast(BI)) - if (DDI->getAddress() == V) - return DDI; - - return 0; -} - -static bool getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, - std::string &File, std::string &Dir) { - DICompileUnit Unit; - DIType TypeD; - - if (GlobalVariable *GV = dyn_cast(const_cast(V))) { - Value *DIGV = findDbgGlobalDeclare(GV); - if (!DIGV) return false; - DIGlobalVariable Var(cast(DIGV)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else if (Function *F = dyn_cast(const_cast(V))){ - Value *DIF = findDbgSubprogramDeclare(F); - if (!DIF) return false; - DISubprogram Var(cast(DIF)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else { - const DbgDeclareInst *DDI = findDbgDeclare(V); - if (!DDI) return false; - DIVariable Var(cast(DDI->getVariable())); - - StringRef D = Var.getName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } - - StringRef T = TypeD.getName(); - if (!T.empty()) - Type = T; - StringRef F = Unit.getFilename(); - if (!F.empty()) - File = F; - StringRef D = Unit.getDirectory(); - if (!D.empty()) - Dir = D; - return true; -} - -void PrintDbgInfo::printVariableDeclaration(const Value *V) { - std::string DisplayName, File, Directory, Type; - unsigned LineNo = 0; - - if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) - return; - - Out << "; "; - WriteAsOperand(Out, V, false, 0); - if (isa(V)) - Out << " is function " << DisplayName - << " of type " << Type << " declared at "; - else - Out << " is variable " << DisplayName - << " of type " << Type << " declared at "; - - if (PrintDirectory) - Out << Directory << "/"; - - Out << File << ":" << LineNo << "\n"; -} - -bool PrintDbgInfo::runOnFunction(Function &F) { - if (F.isDeclaration()) - return false; - - Out << "function " << F.getName() << "\n\n"; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *BB = I; - - if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) - // Skip dead blocks. - continue; - - Out << BB->getName(); - Out << ":"; - - Out << "\n"; - - for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); - i != e; ++i) { - - printVariableDeclaration(i); - - if (const User *U = dyn_cast(i)) { - for(unsigned i=0;igetNumOperands();i++) - printVariableDeclaration(U->getOperand(i)); - } - } - } - return false; -} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 95ac5ea233b12..cbc71bd6e7394 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -55,12 +55,12 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" @@ -145,22 +145,20 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { // Used to test the dependence analyzer. -// Looks through the function, noting the first store instruction -// and the first load instruction -// (which always follows the first load in our tests). -// Calls depends() and prints out the result. +// Looks through the function, noting loads and stores. +// Calls depends() on every possible pair and prints out the result. // Ignores all other instructions. static void dumpExampleDependence(raw_ostream &OS, Function *F, DependenceAnalysis *DA) { for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; ++SrcI) { - if (const StoreInst *Src = dyn_cast(&*SrcI)) { + if (isa(*SrcI) || isa(*SrcI)) { for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE; ++DstI) { - if (const LoadInst *Dst = dyn_cast(&*DstI)) { + if (isa(*DstI) || isa(*DstI)) { OS << "da analyze - "; - if (Dependence *D = DA->depends(Src, Dst, true)) { + if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { D->dump(OS); for (unsigned Level = 1; Level <= D->getLevels(); Level++) { if (D->isSplitable(Level)) { @@ -173,7 +171,6 @@ void dumpExampleDependence(raw_ostream &OS, Function *F, } else OS << "none!\n"; - return; } } } @@ -224,8 +221,8 @@ bool Dependence::isScalar(unsigned level) const { //===----------------------------------------------------------------------===// // FullDependence methods -FullDependence::FullDependence(const Instruction *Source, - const Instruction *Destination, +FullDependence::FullDependence(Instruction *Source, + Instruction *Destination, bool PossiblyLoopIndependent, unsigned CommonLevels) : Dependence(Source, Destination), @@ -586,42 +583,40 @@ void Dependence::dump(raw_ostream &OS) const { else if (isInput()) OS << "input"; unsigned Levels = getLevels(); - if (Levels) { - OS << " ["; - for (unsigned II = 1; II <= Levels; ++II) { - if (isSplitable(II)) - Splitable = true; - if (isPeelFirst(II)) - OS << 'p'; - const SCEV *Distance = getDistance(II); - if (Distance) - OS << *Distance; - else if (isScalar(II)) - OS << "S"; + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; else { - unsigned Direction = getDirection(II); - if (Direction == DVEntry::ALL) - OS << "*"; - else { - if (Direction & DVEntry::LT) - OS << "<"; - if (Direction & DVEntry::EQ) - OS << "="; - if (Direction & DVEntry::GT) - OS << ">"; - } + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; } - if (isPeelLast(II)) - OS << 'p'; - if (II < Levels) - OS << " "; } - if (isLoopIndependent()) - OS << "|<"; - OS << "]"; - if (Splitable) - OS << " splitable"; + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; } OS << "!\n"; } @@ -652,10 +647,10 @@ bool isLoadOrStore(const Instruction *I) { static -const Value *getPointerOperand(const Instruction *I) { - if (const LoadInst *LI = dyn_cast(I)) +Value *getPointerOperand(Instruction *I) { + if (LoadInst *LI = dyn_cast(I)) return LI->getPointerOperand(); - if (const StoreInst *SI = dyn_cast(I)) + if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); llvm_unreachable("Value is not load or store instruction"); return 0; @@ -2215,13 +2210,13 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { // // It occurs to me that the presence of loop-invariant variables // changes the nature of the test from "greatest common divisor" -// to "a common divisor!" +// to "a common divisor". bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, const SCEV *Dst, FullDependence &Result) const { DEBUG(dbgs() << "starting gcd\n"); ++GCDapplications; - unsigned BitWidth = Src->getType()->getIntegerBitWidth(); + unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); APInt RunningGCD = APInt::getNullValue(BitWidth); // Examine Src coefficients. @@ -3197,42 +3192,42 @@ static void dumpSmallBitVector(SmallBitVector &BV) { // Goff, Kennedy, Tseng // PLDI 1991 // -// Care is required to keep the code below up to date w.r.t. this routine. -Dependence *DependenceAnalysis::depends(const Instruction *Src, - const Instruction *Dst, +// Care is required to keep the routine below, getSplitIteration(), +// up to date with respect to this routine. +Dependence *DependenceAnalysis::depends(Instruction *Src, + Instruction *Dst, bool PossiblyLoopIndependent) { + if (Src == Dst) + PossiblyLoopIndependent = false; + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) // if both instructions don't reference memory, there's no dependence return NULL; - if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + DEBUG(dbgs() << "can only handle simple loads and stores\n"); return new Dependence(Src, Dst); + } - const Value *SrcPtr = getPointerOperand(Src); - const Value *DstPtr = getPointerOperand(Dst); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { case AliasAnalysis::MayAlias: case AliasAnalysis::PartialAlias: // cannot analyse objects if we don't understand their aliasing. + DEBUG(dbgs() << "can't analyze may or partial alias\n"); return new Dependence(Src, Dst); case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. + DEBUG(dbgs() << "no alias\n"); return NULL; case AliasAnalysis::MustAlias: break; // The underlying objects alias; test accesses for dependence. } - const GEPOperator *SrcGEP = dyn_cast(SrcPtr); - const GEPOperator *DstGEP = dyn_cast(DstPtr); - if (!SrcGEP || !DstGEP) - return new Dependence(Src, Dst); // missing GEP, assume dependence - - if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) - return new Dependence(Src, Dst); // different types, assume dependence - // establish loop nesting levels establishNestingLevels(Src, Dst); DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); @@ -3241,36 +3236,62 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); ++TotalArrayPairs; - // classify subscript pairs - unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast(SrcPtr); + GEPOperator *DstGEP = dyn_cast(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); + DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); + + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector Pair(Pairs); - for (unsigned SI = 0; SI < Pairs; ++SI) { - Pair[SI].Loops.resize(MaxLevels + 1); - Pair[SI].GroupLoops.resize(MaxLevels + 1); - Pair[SI].Group.resize(Pairs); - } - Pairs = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(), - DstEnd = DstGEP->idx_end(); - SrcIdx != SrcEnd && DstIdx != DstEnd; - ++SrcIdx, ++DstIdx, ++Pairs) { - Pair[Pairs].Src = SE->getSCEV(*SrcIdx); - Pair[Pairs].Dst = SE->getSCEV(*DstIdx); - removeMatchingExtensions(&Pair[Pairs]); - Pair[Pairs].Classification = - classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), - Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), - Pair[Pairs].Loops); - Pair[Pairs].GroupLoops = Pair[Pairs].Loops; - Pair[Pairs].Group.set(Pairs); - DEBUG(dbgs() << " subscript " << Pairs << "\n"); - DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); - DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); - DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); + if (UsefulGEP) { + DEBUG(dbgs() << " using GEPs\n"); + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + DEBUG(dbgs() << " ignoring GEPs\n"); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); + DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); + DEBUG(dbgs() << " subscript " << P << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); DEBUG(dbgs() << "\tloops = "); - DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); + DEBUG(dumpSmallBitVector(Pair[P].Loops)); } SmallBitVector Separable(Pairs); @@ -3532,7 +3553,7 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, } } - // make sure Scalar flags are set correctly + // Make sure the Scalar flags are set correctly. SmallBitVector CompleteLoops(MaxLevels + 1); for (unsigned SI = 0; SI < Pairs; ++SI) CompleteLoops |= Pair[SI].Loops; @@ -3540,8 +3561,10 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, if (CompleteLoops[II]) Result.DV[II - 1].Scalar = false; - // make sure loopIndepent flag is set correctly if (PossiblyLoopIndependent) { + // Make sure the LoopIndependent flag is set correctly. + // All directions must include equal, otherwise no + // loop-independent dependence is possible. for (unsigned II = 1; II <= CommonLevels; ++II) { if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { Result.LoopIndependent = false; @@ -3549,6 +3572,19 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, } } } + else { + // On the other hand, if all directions are equal and there's no + // loop-independent dependence possible, then no dependence exists. + bool AllEqual = true; + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (Result.getDirection(II) != Dependence::DVEntry::EQ) { + AllEqual = false; + break; + } + } + if (AllEqual) + return NULL; + } FullDependence *Final = new FullDependence(Result); Result.DV = NULL; @@ -3565,7 +3601,8 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, // though simplified since we know that the dependence exists. // It's tedious, since we must go through all propagations, etc. // -// Care is required to keep this code up to date w.r.t. the code above. +// Care is required to keep this code up to date with respect to the routine +// above, depends(). // // Generally, the dependence analyzer will be used to build // a dependence graph for a function (basically a map from instructions @@ -3608,50 +3645,65 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, assert(Dep && "expected a pointer to a Dependence"); assert(Dep->isSplitable(SplitLevel) && "Dep should be splitable at SplitLevel"); - const Instruction *Src = Dep->getSrc(); - const Instruction *Dst = Dep->getDst(); + Instruction *Src = Dep->getSrc(); + Instruction *Dst = Dep->getDst(); assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); assert(isLoadOrStore(Src)); assert(isLoadOrStore(Dst)); - const Value *SrcPtr = getPointerOperand(Src); - const Value *DstPtr = getPointerOperand(Dst); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == AliasAnalysis::MustAlias); - const GEPOperator *SrcGEP = dyn_cast(SrcPtr); - const GEPOperator *DstGEP = dyn_cast(DstPtr); - assert(SrcGEP); - assert(DstGEP); - assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); // establish loop nesting levels establishNestingLevels(Src, Dst); FullDependence Result(Src, Dst, false, CommonLevels); - // classify subscript pairs - unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast(SrcPtr); + GEPOperator *DstGEP = dyn_cast(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector Pair(Pairs); - for (unsigned SI = 0; SI < Pairs; ++SI) { - Pair[SI].Loops.resize(MaxLevels + 1); - Pair[SI].GroupLoops.resize(MaxLevels + 1); - Pair[SI].Group.resize(Pairs); - } - Pairs = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(), - DstEnd = DstGEP->idx_end(); - SrcIdx != SrcEnd && DstIdx != DstEnd; - ++SrcIdx, ++DstIdx, ++Pairs) { - Pair[Pairs].Src = SE->getSCEV(*SrcIdx); - Pair[Pairs].Dst = SE->getSCEV(*DstIdx); - Pair[Pairs].Classification = - classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), - Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), - Pair[Pairs].Loops); - Pair[Pairs].GroupLoops = Pair[Pairs].Loops; - Pair[Pairs].Group.set(Pairs); + if (UsefulGEP) { + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); } SmallBitVector Separable(Pairs); diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 3e537e9f1a36a..7e4a89f1bd578 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 34d6d1bdd4215..67b4135779809 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -1,9 +1,11 @@ add_llvm_library(LLVMipa CallGraph.cpp CallGraphSCCPass.cpp + CallPrinter.cpp FindUsedTypes.cpp GlobalsModRef.cpp IPA.cpp + InlineCost.cpp ) add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index dec0eced27863..7620fd9842cce 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 449b7ee87b1c4..a0d788f34a3c6 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -16,13 +16,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "cgscc-passmgr" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/PassManagers.h" -#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/PassManagers.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" @@ -51,6 +51,9 @@ public: /// whether any of the passes modifies the module, and if so, return true. bool runOnModule(Module &M); + using ModulePass::doInitialization; + using ModulePass::doFinalization; + bool doInitialization(CallGraph &CG); bool doFinalization(CallGraph &CG); diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp new file mode 100644 index 0000000000000..306ae7a4dbfbf --- /dev/null +++ b/lib/Analysis/IPA/CallPrinter.cpp @@ -0,0 +1,87 @@ +//===- CallPrinter.cpp - DOT printer for call graph -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-callgraph', which emit a callgraph..dot +// containing the call graph of a module. +// +// There is also a pass available to directly call dotty ('-view-callgraph'). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" + +using namespace llvm; + +namespace llvm { + +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(CallGraph *Graph) { + return "Call graph"; + } + + std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { + if (Function *Func = Node->getFunction()) + return Func->getName(); + + return "external node"; + } +}; + +} // end llvm namespace + +namespace { + +struct CallGraphViewer + : public DOTGraphTraitsModuleViewer { + static char ID; + + CallGraphViewer() + : DOTGraphTraitsModuleViewer("callgraph", ID) { + initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct CallGraphPrinter + : public DOTGraphTraitsModulePrinter { + static char ID; + + CallGraphPrinter() + : DOTGraphTraitsModulePrinter("callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} // end anonymous namespace + +char CallGraphViewer::ID = 0; +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", + "View call graph", + false, false) + +char CallGraphPrinter::ID = 0; +INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", + "Print call graph to 'dot' file", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +ModulePass *llvm::createCallGraphViewerPass() { + return new CallGraphViewer(); +} + +ModulePass *llvm::createCallGraphPrinterPass() { + return new CallGraphPrinter(); +} diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index e9df3ca010224..1c4f17d3819ae 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -14,10 +14,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 990caa80c8d24..92d0d2318e0d2 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -16,20 +16,20 @@ #define DEBUG_TYPE "globalsmodref-aa" #include "llvm/Analysis/Passes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SCCIterator.h" #include using namespace llvm; diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp index 0ba2e04c63024..aa5164e9e79bc 100644 --- a/lib/Analysis/IPA/IPA.cpp +++ b/lib/Analysis/IPA/IPA.cpp @@ -20,6 +20,8 @@ using namespace llvm; void llvm::initializeIPA(PassRegistry &Registry) { initializeBasicCallGraphPass(Registry); initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); initializeGlobalsModRefPass(Registry); } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp new file mode 100644 index 0000000000000..35c45e61808b0 --- /dev/null +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -0,0 +1,1239 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline-cost" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + +namespace { + +class CallAnalyzer : public InstVisitor { + typedef InstVisitor Base; + friend class InstVisitor; + + // DataLayout if available, or null. + const DataLayout *const TD; + + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + + // The called function. + Function &F; + + int Threshold; + int Cost; + + bool IsCallerRecursive; + bool IsRecursiveCall; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap::iterator &CostIt); + void disableSROA(DenseMap::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} + + bool analyzeCall(CallSite CS); + + int getThreshold() { return Threshold; } + int getCost() { return Cost; } + + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; + + void dump(); +}; + +} // namespace + +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} + +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; + + DenseMap::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; + + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); +} + +/// \brief Disable SROA for the candidate marked by this cost iterator. +/// +/// This marks the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); + return true; + } + + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa(*I) && !SimplifiedValues.lookup(*I)) + return false; + + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast(SimpleOp); + if (!OpC) + return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. + + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} + +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; + } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; + } + } + + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. + return true; + } + + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; +} + +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offsets through casts + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; +} + +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *COp = dyn_cast(Operand); + if (!COp) + COp = SimplifiedValues.lookup(Operand); + if (COp) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + COp, TD)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); + + return false; +} + +bool CallAnalyzer::visitICmp(ICmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CRHS = dyn_cast(RHS)) + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; + } + + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; + } + } + } + + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } + + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } + } + } + + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); +} + +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); + + return false; +} + +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast(*I); + if (!C) + C = dyn_cast_or_null(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && + !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } + if (CS.isCall() && + cast(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; + + if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; + + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } + } + + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; + } + + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; + } + + return Base::visitCallSite(CS); + } + + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } + + return Base::visitCallSite(CS); +} + +bool CallAnalyzer::visitInstruction(Instruction &I) { + // Some instructions are free. All of the free intrinsics can also be + // handled by SROA, etc. + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + return true; + + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; +} + + +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); + I != E; ++I) { + ++NumInstructions; + if (isa(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (Cost > (Threshold + VectorBonus)) + return false; + } + + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast(ConstantInt::get(IntPtrTy, Offset)); +} + +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the routine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; + } + } + + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast(Instr)) { + if (isa(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa(++BasicBlock::iterator(Instr))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + + if (F.empty()) + return true; + + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + + // Track whether we've seen a return instruction. The first return + // instruction is free, as at least one will usually disappear in inlining. + bool HasReturn = false; + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } + } + } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); + + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector, + SmallPtrSet > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (Cost > (Threshold + VectorBonus)) + break; + + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + + // Handle the terminator cost here where we can track returns and other + // function-wide constructs. + TerminatorInst *TI = BB->getTerminator(); + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa(TI)) + return false; + + if (!HasReturn && isa(TI)) + HasReturn = true; + else + Cost += InlineConstants::InstrCost; + + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + break; + } + + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } + } + + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } + } + + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + + Threshold += VectorBonus; + + return Cost < Threshold; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); +#undef DEBUG_PRINT_STAT +} +#endif + +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable(); + TTI = &getAnalysis(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline) || + CS.isNoInline()) + return llvm::InlineCost::getNever(); + + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); + + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); + + DEBUG(CA.dump()); + + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(); + if (ShouldInline && CA.getCost() >= CA.getThreshold()) + return InlineCost::getAlways(); + + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); +} + +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index d4221b89e0f6f..b33e2cb9999e8 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -14,17 +14,17 @@ #define DEBUG_TYPE "iv-users" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Type.h" -#include "llvm/DerivedTypes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp deleted file mode 100644 index 5f51f775f142a..0000000000000 --- a/lib/Analysis/InlineCost.cpp +++ /dev/null @@ -1,1067 +0,0 @@ -//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements inline cost analysis. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "inline-cost" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/CallingConv.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/GlobalAlias.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" - -using namespace llvm; - -STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); - -namespace { - -class CallAnalyzer : public InstVisitor { - typedef InstVisitor Base; - friend class InstVisitor; - - // DataLayout if available, or null. - const DataLayout *const TD; - - // The called function. - Function &F; - - int Threshold; - int Cost; - const bool AlwaysInline; - - bool IsCallerRecursive; - bool IsRecursiveCall; - bool ExposesReturnsTwice; - bool HasDynamicAlloca; - /// Number of bytes allocated statically by the callee. - uint64_t AllocatedSize; - unsigned NumInstructions, NumVectorInstructions; - int FiftyPercentVectorBonus, TenPercentVectorBonus; - int VectorBonus; - - // While we walk the potentially-inlined instructions, we build up and - // maintain a mapping of simplified values specific to this callsite. The - // idea is to propagate any special information we have about arguments to - // this call through the inlinable section of the function, and account for - // likely simplifications post-inlining. The most important aspect we track - // is CFG altering simplifications -- when we prove a basic block dead, that - // can cause dramatic shifts in the cost of inlining a function. - DenseMap SimplifiedValues; - - // Keep track of the values which map back (through function arguments) to - // allocas on the caller stack which could be simplified through SROA. - DenseMap SROAArgValues; - - // The mapping of caller Alloca values to their accumulated cost savings. If - // we have to disable SROA for one of the allocas, this tells us how much - // cost must be added. - DenseMap SROAArgCosts; - - // Keep track of values which map to a pointer base and constant offset. - DenseMap > ConstantOffsetPtrs; - - // Custom simplification helper routines. - bool isAllocaDerivedArg(Value *V); - bool lookupSROAArgAndCost(Value *V, Value *&Arg, - DenseMap::iterator &CostIt); - void disableSROA(DenseMap::iterator CostIt); - void disableSROA(Value *V); - void accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost); - bool handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost); - bool isGEPOffsetConstant(GetElementPtrInst &GEP); - bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); - ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); - - // Custom analysis routines. - bool analyzeBlock(BasicBlock *BB); - - // Disable several entry points to the visitor so we don't accidentally use - // them by declaring but not defining them here. - void visit(Module *); void visit(Module &); - void visit(Function *); void visit(Function &); - void visit(BasicBlock *); void visit(BasicBlock &); - - // Provide base case for our instruction visit. - bool visitInstruction(Instruction &I); - - // Our visit overrides. - bool visitAlloca(AllocaInst &I); - bool visitPHI(PHINode &I); - bool visitGetElementPtr(GetElementPtrInst &I); - bool visitBitCast(BitCastInst &I); - bool visitPtrToInt(PtrToIntInst &I); - bool visitIntToPtr(IntToPtrInst &I); - bool visitCastInst(CastInst &I); - bool visitUnaryInstruction(UnaryInstruction &I); - bool visitICmp(ICmpInst &I); - bool visitSub(BinaryOperator &I); - bool visitBinaryOperator(BinaryOperator &I); - bool visitLoad(LoadInst &I); - bool visitStore(StoreInst &I); - bool visitCallSite(CallSite CS); - -public: - CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) - : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), - NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { - } - - bool analyzeCall(CallSite CS); - - int getThreshold() { return Threshold; } - int getCost() { return Cost; } - bool isAlwaysInline() { return AlwaysInline; } - - // Keep a bunch of stats about the cost savings found so we can print them - // out when debugging. - unsigned NumConstantArgs; - unsigned NumConstantOffsetPtrArgs; - unsigned NumAllocaArgs; - unsigned NumConstantPtrCmps; - unsigned NumConstantPtrDiffs; - unsigned NumInstructionsSimplified; - unsigned SROACostSavings; - unsigned SROACostSavingsLost; - - void dump(); -}; - -} // namespace - -/// \brief Test whether the given value is an Alloca-derived function argument. -bool CallAnalyzer::isAllocaDerivedArg(Value *V) { - return SROAArgValues.count(V); -} - -/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. -/// Returns false if V does not map to a SROA-candidate. -bool CallAnalyzer::lookupSROAArgAndCost( - Value *V, Value *&Arg, DenseMap::iterator &CostIt) { - if (SROAArgValues.empty() || SROAArgCosts.empty()) - return false; - - DenseMap::iterator ArgIt = SROAArgValues.find(V); - if (ArgIt == SROAArgValues.end()) - return false; - - Arg = ArgIt->second; - CostIt = SROAArgCosts.find(Arg); - return CostIt != SROAArgCosts.end(); -} - -/// \brief Disable SROA for the candidate marked by this cost iterator. -/// -/// This marks the candidate as no longer viable for SROA, and adds the cost -/// savings associated with it back into the inline cost measurement. -void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { - // If we're no longer able to perform SROA we need to undo its cost savings - // and prevent subsequent analysis. - Cost += CostIt->second; - SROACostSavings -= CostIt->second; - SROACostSavingsLost += CostIt->second; - SROAArgCosts.erase(CostIt); -} - -/// \brief If 'V' maps to a SROA candidate, disable SROA for it. -void CallAnalyzer::disableSROA(Value *V) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(V, SROAArg, CostIt)) - disableSROA(CostIt); -} - -/// \brief Accumulate the given cost for a particular SROA candidate. -void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost) { - CostIt->second += InstructionCost; - SROACostSavings += InstructionCost; -} - -/// \brief Helper for the common pattern of handling a SROA candidate. -/// Either accumulates the cost savings if the SROA remains valid, or disables -/// SROA for the candidate. -bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost) { - if (IsSROAValid) { - accumulateSROACost(CostIt, InstructionCost); - return true; - } - - disableSROA(CostIt); - return false; -} - -/// \brief Check whether a GEP's indices are all constant. -/// -/// Respects any simplified values known during the analysis of this callsite. -bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { - for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) - if (!isa(*I) && !SimplifiedValues.lookup(*I)) - return false; - - return true; -} - -/// \brief Accumulate a constant GEP offset into an APInt if possible. -/// -/// Returns false if unable to compute the offset for any reason. Respects any -/// simplified values known during the analysis of this callsite. -bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - if (!TD) - return false; - - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - assert(IntPtrWidth == Offset.getBitWidth()); - - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast(GTI.getOperand()); - if (!OpC) - if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) - OpC = dyn_cast(SimpleOp); - if (!OpC) - return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD->getStructLayout(STy); - Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); - Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; - } - return true; -} - -bool CallAnalyzer::visitAlloca(AllocaInst &I) { - // FIXME: Check whether inlining will turn a dynamic alloca into a static - // alloca, and handle that case. - - // Accumulate the allocated size. - if (I.isStaticAlloca()) { - Type *Ty = I.getAllocatedType(); - AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : - Ty->getPrimitiveSizeInBits()); - } - - // We will happily inline static alloca instructions or dynamic alloca - // instructions in always-inline situations. - if (AlwaysInline || I.isStaticAlloca()) - return Base::visitAlloca(I); - - // FIXME: This is overly conservative. Dynamic allocas are inefficient for - // a variety of reasons, and so we would like to not inline them into - // functions which don't currently have a dynamic alloca. This simply - // disables inlining altogether in the presence of a dynamic alloca. - HasDynamicAlloca = true; - return false; -} - -bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - - // FIXME: We need to propagate SROA *disabling* through phi nodes, even - // though we don't want to propagate it's bonuses. The idea is to disable - // SROA if it *might* be used in an inappropriate manner. - - // Phi nodes are always zero-cost. - return true; -} - -bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), - SROAArg, CostIt); - - // Try to fold GEPs of constant-offset call site argument pointers. This - // requires target data and inbounds GEPs. - if (TD && I.isInBounds()) { - // Check if we have a base + offset for the pointer. - Value *Ptr = I.getPointerOperand(); - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); - if (BaseAndOffset.first) { - // Check if the offset of this GEP is constant, and if so accumulate it - // into Offset. - if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { - // Non-constant GEPs aren't folded, and disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return false; - } - - // Add the result as a new mapping to Base + Offset. - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also handle SROA candidates here, we already know that the GEP is - // all-constant indexed. - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - return true; - } - } - - if (isGEPOffsetConstant(I)) { - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - // Constant GEPs are modeled as free. - return true; - } - - // Variable GEPs will require math and will disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return false; -} - -bool CallAnalyzer::visitBitCast(BitCastInst &I) { - // Propagate constants through bitcasts. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offsets through casts - std::pair BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); - // Casts don't change the offset, just wrap it up. - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also look for SROA candidates here. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - // Bitcasts are always zero cost. - return true; -} - -bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { - // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offset pairs when converted to a plain integer provided the - // integer is large enough to represent the pointer. - unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (TD && IntegerSize >= TD->getPointerSizeInBits()) { - std::pair BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - } - - // This is really weird. Technically, ptrtoint will disable SROA. However, - // unless that ptrtoint is *used* somewhere in the live basic blocks after - // inlining, it will be nuked, and SROA should proceed. All of the uses which - // would block SROA would also block SROA if applied directly to a pointer, - // and so we can just add the integer in here. The only places where SROA is - // preserved either cannot fire on an integer, or won't in-and-of themselves - // disable SROA (ext) w/o some later use that we would see and disable. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - return isInstructionFree(&I, TD); -} - -bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { - // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offset pairs when round-tripped through a pointer without - // modifications provided the integer is not too large. - Value *Op = I.getOperand(0); - unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (TD && IntegerSize <= TD->getPointerSizeInBits()) { - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - } - - // "Propagate" SROA here in the same manner as we do for ptrtoint above. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - return isInstructionFree(&I, TD); -} - -bool CallAnalyzer::visitCastInst(CastInst &I) { - // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. - disableSROA(I.getOperand(0)); - - return isInstructionFree(&I, TD); -} - -bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { - Value *Operand = I.getOperand(0); - Constant *Ops[1] = { dyn_cast(Operand) }; - if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) - if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), - Ops, TD)) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable any SROA on the argument to arbitrary unary operators. - disableSROA(Operand); - - return false; -} - -bool CallAnalyzer::visitICmp(ICmpInst &I) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - // First try to handle simplified comparisons. - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast(LHS)) - if (Constant *CRHS = dyn_cast(RHS)) - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - return true; - } - - // Otherwise look for a comparison between constant offset pointers with - // a common base. - Value *LHSBase, *RHSBase; - APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); - if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); - if (RHSBase && LHSBase == RHSBase) { - // We have common bases, fold the icmp to a constant based on the - // offsets. - Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); - Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - ++NumConstantPtrCmps; - return true; - } - } - } - - // If the comparison is an equality comparison with null, we can simplify it - // for any alloca-derived argument. - if (I.isEquality() && isa(I.getOperand(1))) - if (isAllocaDerivedArg(I.getOperand(0))) { - // We can actually predict the result of comparisons between an - // alloca-derived value and null. Note that this fires regardless of - // SROA firing. - bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; - SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) - : ConstantInt::getFalse(I.getType()); - return true; - } - - // Finally check for SROA candidates in comparisons. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (isa(I.getOperand(1))) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitSub(BinaryOperator &I) { - // Try to handle a special case: we can fold computing the difference of two - // constant-related pointers. - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Value *LHSBase, *RHSBase; - APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); - if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); - if (RHSBase && LHSBase == RHSBase) { - // We have common bases, fold the subtract to a constant based on the - // offsets. - Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); - Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); - if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { - SimplifiedValues[&I] = C; - ++NumConstantPtrDiffs; - return true; - } - } - } - - // Otherwise, fall back to the generic logic for simplifying and handling - // instructions. - return Base::visitSub(I); -} - -bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); - if (Constant *C = dyn_cast_or_null(SimpleV)) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable any SROA on arguments to arbitrary, unsimplified binary operators. - disableSROA(LHS); - disableSROA(RHS); - - return false; -} - -bool CallAnalyzer::visitLoad(LoadInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitStore(StoreInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && - !F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice)) { - // This aborts the entire analysis. - ExposesReturnsTwice = true; - return false; - } - - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: - return Base::visitCallSite(CS); - - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - } - } - - if (Function *F = CS.getCalledFunction()) { - if (F == CS.getInstruction()->getParent()->getParent()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } - - if (!callIsSmall(CS)) { - // We account for the average 1 instruction per call argument setup - // here. - Cost += CS.arg_size() * InlineConstants::InstrCost; - - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; - } - - return Base::visitCallSite(CS); - } - - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = CS.getCalledValue(); - - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - Cost += CS.arg_size() * InlineConstants::InstrCost; - - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) - return Base::visitCallSite(CS); - - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); - if (CA.analyzeCall(CS)) { - // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); - } - - return Base::visitCallSite(CS); -} - -bool CallAnalyzer::visitInstruction(Instruction &I) { - // Some instructions are free. All of the free intrinsics can also be - // handled by SROA, etc. - if (isInstructionFree(&I, TD)) - return true; - - // We found something we don't understand or can't handle. Mark any SROA-able - // values in the operand list as no longer viable. - for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) - disableSROA(*OI); - - return false; -} - - -/// \brief Analyze a basic block for its contribution to the inline cost. -/// -/// This method walks the analyzer over every instruction in the given basic -/// block and accounts for their cost during inlining at this callsite. It -/// aborts early if the threshold has been exceeded or an impossible to inline -/// construct has been detected. It returns false if inlining is no longer -/// viable, and true if inlining remains viable. -bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { - ++NumInstructions; - if (isa(I) || I->getType()->isVectorTy()) - ++NumVectorInstructions; - - // If the instruction simplified to a constant, there is no cost to this - // instruction. Visit the instructions using our InstVisitor to account for - // all of the per-instruction logic. The visit tree returns true if we - // consumed the instruction in any way, and false if the instruction's base - // cost should count against inlining. - if (Base::visit(I)) - ++NumInstructionsSimplified; - else - Cost += InlineConstants::InstrCost; - - // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - if (NumVectorInstructions > NumInstructions/2) - VectorBonus = FiftyPercentVectorBonus; - else if (NumVectorInstructions > NumInstructions/10) - VectorBonus = TenPercentVectorBonus; - else - VectorBonus = 0; - - // Check if we've past the threshold so we don't spin in huge basic - // blocks that will never inline. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) - return false; - } - - return true; -} - -/// \brief Compute the base pointer and cumulative constant offsets for V. -/// -/// This strips all constant offsets off of V, leaving it the base pointer, and -/// accumulates the total constant offset applied in the returned constant. It -/// returns 0 if V is not a pointer, and returns the constant '0' if there are -/// no constant offsets applied. -ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { - if (!TD || !V->getType()->isPointerTy()) - return 0; - - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - APInt Offset = APInt::getNullValue(IntPtrWidth); - - // Even though we don't look through PHI nodes, we could be called on an - // instruction in an unreachable block, which may be on a cycle. - SmallPtrSet Visited; - Visited.insert(V); - do { - if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) - return 0; - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) - break; - V = GA->getAliasee(); - } else { - break; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } while (Visited.insert(V)); - - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); - return cast(ConstantInt::get(IntPtrTy, Offset)); -} - -/// \brief Analyze a call site for potential inlining. -/// -/// Returns true if inlining this call is viable, and false if it is not -/// viable. It computes the cost and adjusts the threshold based on numerous -/// factors and heuristics. If this method returns false but the computed cost -/// is below the computed threshold, then inlining was forcibly disabled by -/// some artifact of the rountine. -bool CallAnalyzer::analyzeCall(CallSite CS) { - ++NumCallsAnalyzed; - - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - Threshold += SingleBBBonus; - - // Unless we are always-inlining, perform some tweaks to the cost and - // threshold based on the direct callsite information. - if (!AlwaysInline) { - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; - - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); - - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } - } - - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast(Instr)) { - if (isa(II->getNormalDest()->begin())) - Threshold = 1; - } else if (isa(++BasicBlock::iterator(Instr))) - Threshold = 1; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; - - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) - return false; - } - - if (F.empty()) - return true; - - Function *Caller = CS.getInstruction()->getParent()->getParent(); - // Check if the caller function is recursive itself. - for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); - U != E; ++U) { - CallSite Site(cast(*U)); - if (!Site) - continue; - Instruction *I = Site.getInstruction(); - if (I->getParent()->getParent() == Caller) { - IsCallerRecursive = true; - break; - } - } - - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - - // Populate our simplified values by mapping from function arguments to call - // arguments with known important simplifications. - CallSite::arg_iterator CAI = CS.arg_begin(); - for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); - FAI != FAE; ++FAI, ++CAI) { - assert(CAI != CS.arg_end()); - if (Constant *C = dyn_cast(CAI)) - SimplifiedValues[FAI] = C; - - Value *PtrArg = *CAI; - if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { - ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); - - // We can SROA any pointer arguments derived from alloca instructions. - if (isa(PtrArg)) { - SROAArgValues[FAI] = PtrArg; - SROAArgCosts[PtrArg] = 0; - } - } - } - NumConstantArgs = SimplifiedValues.size(); - NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); - NumAllocaArgs = SROAArgValues.size(); - - // The worklist of live basic blocks in the callee *after* inlining. We avoid - // adding basic blocks of the callee which can be proven to be dead for this - // particular call site in order to get more accurate cost estimates. This - // requires a somewhat heavyweight iteration pattern: we need to walk the - // basic blocks in a breadth-first order as we insert live successors. To - // accomplish this, prioritizing for small iterations because we exit after - // crossing our threshold, we use a small-size optimized SetVector. - typedef SetVector, - SmallPtrSet > BBSetVector; - BBSetVector BBWorklist; - BBWorklist.insert(&F.getEntryBlock()); - // Note that we *must not* cache the size, this loop grows the worklist. - for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { - // Bail out the moment we cross the threshold. This means we'll under-count - // the cost, but only when undercounting doesn't matter. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) - break; - - BasicBlock *BB = BBWorklist[Idx]; - if (BB->empty()) - continue; - - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa(TI)) - return false; - - if (!HasReturn && isa(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - - // Analyze the cost of this block. If we blow through the threshold, this - // returns false, and we can bail on out. - if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - break; - } - - // Add in the live successors by first checking whether we have terminator - // that may be simplified based on the values simplified by this call. - if (BranchInst *BI = dyn_cast(TI)) { - if (BI->isConditional()) { - Value *Cond = BI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); - continue; - } - } - } else if (SwitchInst *SI = dyn_cast(TI)) { - Value *Cond = SI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); - continue; - } - } - - // If we're unable to select a particular successor, just count all of - // them. - for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; - ++TIdx) - BBWorklist.insert(TI->getSuccessor(TIdx)); - - // If we had any successors at this point, than post-inlining is likely to - // have them as well. Note that we assume any basic blocks which existed - // due to branches or switches which folded above will also fold after - // inlining. - if (SingleBB && TI->getNumSuccessors() > 1) { - // Take off the bonus we applied to the threshold. - Threshold -= SingleBBBonus; - SingleBB = false; - } - } - - Threshold += VectorBonus; - - return AlwaysInline || Cost < Threshold; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// \brief Dump stats about this call's analysis. -void CallAnalyzer::dump() { -#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" - DEBUG_PRINT_STAT(NumConstantArgs); - DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); - DEBUG_PRINT_STAT(NumAllocaArgs); - DEBUG_PRINT_STAT(NumConstantPtrCmps); - DEBUG_PRINT_STAT(NumConstantPtrDiffs); - DEBUG_PRINT_STAT(NumInstructionsSimplified); - DEBUG_PRINT_STAT(SROACostSavings); - DEBUG_PRINT_STAT(SROACostSavingsLost); -#undef DEBUG_PRINT_STAT -} -#endif - -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { - return getInlineCost(CS, CS.getCalledFunction(), Threshold); -} - -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, - int Threshold) { - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (!Callee || Callee->mayBeOverridden() || - Callee->getFnAttributes().hasAttribute(Attributes::NoInline) || - CS.isNoInline()) - return llvm::InlineCost::getNever(); - - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "...\n"); - - CallAnalyzer CA(TD, *Callee, Threshold); - bool ShouldInline = CA.analyzeCall(CS); - - DEBUG(CA.dump()); - - // Check if there was a reason to force inlining or no inlining. - if (!ShouldInline && CA.getCost() < CA.getThreshold()) - return InlineCost::getNever(); - if (ShouldInline && (CA.isAlwaysInline() || - CA.getCost() >= CA.getThreshold())) - return InlineCost::getAlways(); - - return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); -} diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 3b385d26ba3c0..75a49eb90a88f 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -13,13 +13,13 @@ #define DEBUG_TYPE "instcount" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/InstVisitor.h" #include "llvm/Pass.h" -#include "llvm/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(TotalInsts , "Number of instructions (of all types)"); @@ -30,7 +30,7 @@ STATISTIC(TotalMemInst, "Number of memory instructions"); #define HANDLE_INST(N, OPCODE, CLASS) \ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" namespace { @@ -43,7 +43,7 @@ namespace { #define HANDLE_INST(N, OPCODE, CLASS) \ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" void visitInstruction(Instruction &I) { errs() << "Instruction Count does not know about " << I; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index a76e5ad1b8f88..4a3c74e9db356 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,20 +18,20 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" -#include "llvm/GlobalAlias.h" -#include "llvm/Operator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/DataLayout.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -657,51 +657,26 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } -/// \brief Accumulate the constant integer offset a GEP represents. -/// -/// Given a getelementptr instruction/constantexpr, accumulate the constant -/// offset from the base pointer into the provided APInt 'Offset'. Returns true -/// if the GEP has all-constant indices. Returns false if any non-constant -/// index is encountered leaving the 'Offset' in an undefined state. The -/// 'Offset' APInt must be the bitwidth of the target's pointer size. -static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, - APInt &Offset) { - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - assert(IntPtrWidth == Offset.getBitWidth()); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = dyn_cast(*I); - if (!OpC) return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType())); - Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; - } - return true; -} - /// \brief Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. -static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, +/// +/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't +/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. +/// folding. +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Value *&V) { - if (!V->getType()->isPointerTy()) - return 0; + assert(V->getType()->getScalarType()->isPointerTy()); + + // Without DataLayout, just be conservative for now. Theoretically, more could + // be done in this case. + if (!TD) + return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -710,7 +685,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset)) + if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -722,23 +697,24 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, } else { break; } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + assert(V->getType()->getScalarType()->isPointerTy() && + "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext()); - return ConstantInt::get(IntPtrTy, Offset); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + if (V->getType()->isVectorTy()) + return ConstantVector::getSplat(V->getType()->getVectorNumElements(), + OffsetIntPtr); + return OffsetIntPtr; } /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout &TD, +static Constant *computePointerDifference(const DataLayout *TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. @@ -852,9 +828,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). - if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) - if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + if (Constant *Result = computePointerDifference(Q.TD, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // Mul distributes over Sub. Try some generic simplifications based on this. @@ -886,6 +862,112 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } +/// Given operands for an FAdd, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fadd X, -0 ==> X + if (match(Op1, m_NegZero())) + return Op0; + + // fadd X, 0 ==> X, when we know X is not -0 + if (match(Op1, m_Zero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 + // where nnan and ninf have to occur at least once somewhere in this + // expression + Value *SubOp = 0; + if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) + SubOp = Op1; + else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) + SubOp = Op0; + if (SubOp) { + Instruction *FSub = cast(SubOp); + if ((FMF.noNaNs() || FSub->hasNoNaNs()) && + (FMF.noInfs() || FSub->hasNoInfs())) + return Constant::getNullValue(Op0->getType()); + } + + return 0; +} + +/// Given operands for an FSub, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + } + + // fsub X, 0 ==> X + if (match(Op1, m_Zero())) + return Op0; + + // fsub X, -0 ==> X, when we know X is not -0 + if (match(Op1, m_NegZero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fsub 0, (fsub -0.0, X) ==> X + Value *X; + if (match(Op0, m_AnyZero())) { + if (match(Op1, m_FSub(m_NegZero(), m_Value(X)))) + return X; + if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) + return X; + } + + // fsub nnan ninf x, x ==> 0.0 + if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + return 0; +} + +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fmul X, 1.0 ==> X + if (match(Op1, m_FPOne())) + return Op0; + + // fmul nnan nsz X, 0 ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) + return Op1; + + return 0; +} + /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, @@ -951,6 +1033,26 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return 0; } +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { @@ -1364,9 +1466,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true)) return Op0; - if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) return Op1; } @@ -1591,9 +1693,48 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -static Constant *computePointerICmp(const DataLayout &TD, +// A significant optimization not implemented here is assuming that alloca +// addresses are not equal to incoming argument values. They don't *alias*, +// as we say, but that doesn't mean they aren't equal, so we take a +// conservative approach. +// +// This is inspired in part by C++11 5.10p1: +// "Two pointers of the same type compare equal if and only if they are both +// null, both point to the same function, or both represent the same +// address." +// +// This is pretty permissive. +// +// It's also partly due to C11 6.5.9p6: +// "Two pointers compare equal if and only if both are null pointers, both are +// pointers to the same object (including a pointer to an object and a +// subobject at its beginning) or function, both are pointers to one past the +// last element of the same array object, or one is a pointer to one past the +// end of one array object and the other is a pointer to the start of a +// different array object that happens to immediately follow the first array +// object in the address space.) +// +// C11's version is more restrictive, however there's no reason why an argument +// couldn't be a one-past-the-end value for a stack object in the caller and be +// equal to the beginning of a stack object in the callee. +// +// If the C and C++ standards are ever made sufficiently restrictive in this +// area, it may be possible to update LLVM's semantics accordingly and reinstate +// this optimization. +static Constant *computePointerICmp(const DataLayout *TD, + const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { + // First, skip past any trivial no-ops. + LHS = LHS->stripPointerCasts(); + RHS = RHS->stripPointerCasts(); + + // A non-null pointer is not equal to a null pointer. + if (llvm::isKnownNonNull(LHS) && isa(RHS) && + (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: @@ -1616,19 +1757,83 @@ static Constant *computePointerICmp(const DataLayout &TD, break; } + // Strip off any constant offsets so that we can reason about them. + // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets + // here and compare base addresses like AliasAnalysis does, however there are + // numerous hazards. AliasAnalysis and its utilities rely on special rules + // governing loads and stores which don't apply to icmps. Also, AliasAnalysis + // doesn't need to guarantee pointer inequality when it says NoAlias. Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; - // If LHS and RHS are not related via constant offsets to the same base - // value, there is nothing we can do here. - if (LHS != RHS) - return 0; + // If LHS and RHS are related via constant offsets to the same base + // value, we can replace it with an icmp which just compares the offsets. + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + + // Various optimizations for (in)equality comparisons. + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + // Different non-empty allocations that exist at the same time have + // different addresses (if the program can tell). Global variables always + // exist, so they always exist during the lifetime of each other and all + // allocas. Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + // + // With all that, if the offsets are within the bounds of their allocations + // (and not one-past-the-end! so we can't use inbounds!), and their + // allocations aren't the same, the pointers are not equal. + // + // Note that it's not necessary to check for LHS being a global variable + // address, due to canonicalization and constant folding. + if (isa(LHS) && + (isa(RHS) || isa(RHS))) { + ConstantInt *LHSOffsetCI = dyn_cast(LHSOffset); + ConstantInt *RHSOffsetCI = dyn_cast(RHSOffset); + uint64_t LHSSize, RHSSize; + if (LHSOffsetCI && RHSOffsetCI && + getObjectSize(LHS, LHSSize, TD, TLI) && + getObjectSize(RHS, RHSSize, TD, TLI)) { + const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); + const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); + if (!LHSOffsetValue.isNegative() && + !RHSOffsetValue.isNegative() && + LHSOffsetValue.ult(LHSSize) && + RHSOffsetValue.ult(RHSSize)) { + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } - return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + // Repeat the above check but this time without depending on DataLayout + // or being able to compute a precise size. + if (!cast(LHS->getType())->isEmptyTy() && + !cast(RHS->getType())->isEmptyTy() && + LHSOffset->isNullValue() && + RHSOffset->isNullValue()) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } + + // Otherwise, fail. + return 0; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -1693,62 +1898,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp , - Different identified objects have - // different addresses (unless null), and what's more the address of an - // identified local is never equal to another argument (again, barring null). - // Note that generalizing to the case where LHS is a global variable address - // or null is pointless, since if both LHS and RHS are constants then we - // already constant folded the compare, and if only one of them is then we - // moved it to RHS already. - Value *LHSPtr = LHS->stripPointerCasts(); - Value *RHSPtr = RHS->stripPointerCasts(); - if (LHSPtr == RHSPtr) - return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // Be more aggressive about stripping pointer adjustments when checking a - // comparison of an alloca address to another object. We can rip off all - // inbounds GEP operations, even if they are variable. - LHSPtr = LHSPtr->stripInBoundsOffsets(); - if (llvm::isIdentifiedObject(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { - // If both sides are different identified objects, they aren't equal - // unless they're null. - if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - - // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null or they belong to - // different functions. The latter happens during inlining. - if (Instruction *LHSInst = dyn_cast(LHSPtr)) - if (Argument *RHSArg = dyn_cast(RHSPtr)) - if (LHSInst->getParent()->getParent() == RHSArg->getParent() && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - } - - // Assume that the constant null is on the right. - if (llvm::isKnownNonNull(LHSPtr) && isa(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } else if (Argument *LHSArg = dyn_cast(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument unless they come from separate - // functions via inlining. - if (AllocaInst *RHSInst = dyn_cast(RHSPtr)) { - if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } - } - // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; @@ -2375,8 +2524,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. - if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + if (LHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { @@ -2697,10 +2846,18 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); @@ -2768,14 +2925,88 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } -static Value *SimplifyCallInst(CallInst *CI, const Query &) { - // call undef -> undef - if (isa(CI->getCalledValue())) - return UndefValue::get(CI->getType()); +static bool IsIdempotent(Intrinsic::ID ID) { + switch (ID) { + default: return false; + + // Unary idempotent: f(f(x)) = f(x) + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + return true; + } +} + +template +static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + // Perform idempotent optimizations + if (!IsIdempotent(IID)) + return 0; + + // Unary Ops + if (std::distance(ArgBegin, ArgEnd) == 1) + if (IntrinsicInst *II = dyn_cast(*ArgBegin)) + if (II->getIntrinsicID() == IID) + return II; return 0; } +template +static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + Type *Ty = V->getType(); + if (PointerType *PTy = dyn_cast(Ty)) + Ty = PTy->getElementType(); + FunctionType *FTy = cast(Ty); + + // call undef -> undef + if (isa(V)) + return UndefValue::get(FTy->getReturnType()); + + Function *F = dyn_cast(V); + if (!F) + return 0; + + if (unsigned IID = F->getIntrinsicID()) + if (Value *Ret = + SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + return Ret; + + if (!canConstantFoldCallTo(F)) + return 0; + + SmallVector ConstantArgs; + ConstantArgs.reserve(ArgEnd - ArgBegin); + for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { + Constant *C = dyn_cast(*I); + if (!C) + return 0; + ConstantArgs.push_back(C); + } + + return ConstantFoldCall(F, ConstantArgs, Q.TLI); +} + +Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT), + RecursionLimit); +} + +Value *llvm::SimplifyCall(Value *V, ArrayRef Args, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT), + RecursionLimit); +} + /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, @@ -2787,18 +3018,30 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, default: Result = ConstantFoldInstruction(I, TD, TLI); break; + case Instruction::FAdd: + Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), TD, TLI, DT); break; + case Instruction::FSub: + Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), TD, TLI, DT); break; + case Instruction::FMul: + Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Mul: Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; @@ -2872,9 +3115,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, case Instruction::PHI: Result = SimplifyPHINode(cast(I), Query (TD, TLI, DT)); break; - case Instruction::Call: - Result = SimplifyCallInst(cast(I), Query (TD, TLI, DT)); + case Instruction::Call: { + CallSite CS(cast(I)); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + TD, TLI, DT); break; + } case Instruction::Trunc: Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); break; diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp index ca9cdcaf24648..26a0322407ec5 100644 --- a/lib/Analysis/Interval.cpp +++ b/lib/Analysis/Interval.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Interval.h" -#include "llvm/BasicBlock.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/CFG.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 2b87d80d3732a..66b5e852c02f5 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -13,23 +13,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lazy-value-info" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include #include using namespace llvm; diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index efb722bb97c46..fefa51660f92a 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LibCallAliasAnalysis.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Function.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 81b0f46f3740e..0592ccb26c128 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" using namespace llvm; /// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 6d6d580ed19ae..9393508a9e67e 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -34,26 +34,26 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; namespace { @@ -412,51 +412,49 @@ void Lint::visitMemoryReference(Instruction &I, } // Check for buffer overflows and misalignment. - if (TD) { - // Only handles memory references that read/write something simple like an - // alloca instruction or a global variable. - int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { - // OK, so the access is to a constant offset from Ptr. Check that Ptr is - // something we can handle and if so extract the size of this base object - // along with its alignment. - uint64_t BaseSize = AliasAnalysis::UnknownSize; - unsigned BaseAlign = 0; - - if (AllocaInst *AI = dyn_cast(Base)) { - Type *ATy = AI->getAllocatedType(); - if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = TD->getTypeAllocSize(ATy); - BaseAlign = AI->getAlignment(); - if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = TD->getABITypeAlignment(ATy); - } else if (GlobalVariable *GV = dyn_cast(Base)) { - // If the global may be defined differently in another compilation unit - // then don't warn about funky memory accesses. - if (GV->hasDefinitiveInitializer()) { - Type *GTy = GV->getType()->getElementType(); - if (GTy->isSized()) - BaseSize = TD->getTypeAllocSize(GTy); - BaseAlign = GV->getAlignment(); - if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = TD->getABITypeAlignment(GTy); - } + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast(Base)) { + Type *ATy = AI->getAllocatedType(); + if (TD && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (TD && BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (TD && GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (TD && BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); } - - // Accesses from before the start or after the end of the object are not - // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); - - // Accesses that say that the memory is more aligned than it is are not - // defined. - if (Align == 0 && Ty && Ty->isSized()) - Align = TD->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); } + + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (TD && Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 73aa8b49cda58..0902a39a9f819 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -13,12 +13,13 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/DataLayout.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" using namespace llvm; /// AreEquivalentAddressValues - Test if A and B will obviously have the same @@ -48,48 +49,18 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } -/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and -/// bitcasts to get back to the underlying object being addressed, keeping -/// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to GetUnderlyingObject but is located -/// here to avoid making VMCore depend on DataLayout. -static Value *getUnderlyingObjectWithOffset(Value *V, const DataLayout *TD, - uint64_t &ByteOffset, - unsigned MaxLookup = 6) { - if (!V->getType()->isPointerTy()) - return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { - if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->hasAllConstantIndices()) - return V; - SmallVector Indices(GEP->op_begin() + 1, GEP->op_end()); - ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - Indices); - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) - return V; - V = GA->getAliasee(); - } else { - return V; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } - return V; -} - /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align, const DataLayout *TD) { - uint64_t ByteOffset = 0; + int64_t ByteOffset = 0; Value *Base = V; - if (TD) - Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); + + if (ByteOffset < 0) // out of bounds + return false; Type *BaseType = 0; unsigned BaseAlign = 0; @@ -97,10 +68,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); - } else if (const GlobalValue *GV = dyn_cast(Base)) { + } else if (const GlobalVariable *GV = dyn_cast(Base)) { // Global variables are safe to load from but their size cannot be // guaranteed if they are overridden. - if (!isa(GV) && !GV->mayBeOverridden()) { + if (!GV->mayBeOverridden()) { BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 8341f9d830558..f1ad6506e4ba1 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -15,18 +15,19 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; @@ -213,14 +214,75 @@ bool Loop::isLoopSimplifyForm() const { /// isSafeToClone - Return true if the loop body is safe to clone in practice. /// Routines that reform the loop CFG and split edges often fail on indirectbr. bool Loop::isSafeToClone() const { - // Return false if any loop blocks contain indirectbrs. + // Return false if any loop blocks contain indirectbrs, or there are any calls + // to noduplicate functions. for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - if (isa((*I)->getTerminator())) + if (isa((*I)->getTerminator())) { + return false; + } else if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) { + if (II->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + + for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { + if (const CallInst *CI = dyn_cast(BI)) { + if (CI->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + } + } + return true; +} + +bool Loop::isAnnotatedParallel() const { + + BasicBlock *latch = getLoopLatch(); + if (latch == NULL) + return false; + + MDNode *desiredLoopIdMetadata = + latch->getTerminator()->getMetadata("llvm.loop.parallel"); + + if (!desiredLoopIdMetadata) return false; + + // The loop branch contains the parallel loop metadata. In order to ensure + // that any parallel-loop-unaware optimization pass hasn't added loop-carried + // dependencies (thus converted the loop back to a sequential loop), check + // that all the memory instructions in the loop contain parallelism metadata + // that point to the same unique "loop id metadata" the loop branch does. + for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) { + for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end(); + II != EE; II++) { + + if (!II->mayReadOrWriteMemory()) + continue; + + if (!II->getMetadata("llvm.mem.parallel_loop_access")) + return false; + + // The memory instruction can refer to the loop identifier metadata + // directly or indirectly through another list metadata (in case of + // nested parallel loops). The loop identifier metadata refers to + // itself so we can check both cases with the same routine. + MDNode *loopIdMD = + dyn_cast(II->getMetadata("llvm.mem.parallel_loop_access")); + bool loopIdMDFound = false; + for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { + if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { + loopIdMDFound = true; + break; + } + } + + if (!loopIdMDFound) + return false; + } } return true; } + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 8578a63bee1fe..d26aaf1b90486 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -10,15 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; namespace { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 0a539fe758257..d490d5419f75e 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -8,24 +8,24 @@ //===----------------------------------------------------------------------===// // // This family of functions identifies calls to builtin functions that allocate -// or free memory. +// or free memory. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "memory-builtins" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -88,6 +88,10 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { + // Skip intrinsics + if (isa(V)) + return 0; + Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; @@ -132,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.hasFnAttr(Attributes::NoAlias); + return CS && CS.hasFnAttr(Attribute::NoAlias); } @@ -194,12 +198,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return NULL; + return 0; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) - return NULL; + return 0; unsigned ElementSize = TD->getTypeAllocSize(T); if (StructType *ST = dyn_cast(T)) @@ -208,15 +212,15 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = NULL; + Value *Multiple = 0; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return NULL; + return 0; } -/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. const CallInst *llvm::isArrayMalloc(const Value *I, @@ -225,12 +229,12 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const CallInst *CI = extractMallocCall(I, TLI); Value *ArraySize = computeArraySize(CI, TD, TLI); - if (ArraySize && - ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - return CI; + if (ConstantInt *ConstSize = dyn_cast_or_null(ArraySize)) + if (ConstSize->isOne()) + return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return NULL; + return 0; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -241,8 +245,8 @@ const CallInst *llvm::isArrayMalloc(const Value *I, PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = NULL; + + PointerType *MallocType = 0; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -262,7 +266,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast(CI->getType()); // Type could not be determined. - return NULL; + return 0; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -273,10 +277,10 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : NULL; + return PT ? PT->getElementType() : 0; } -/// getMallocArraySize - Returns the array size of a malloc call. If the +/// getMallocArraySize - Returns the array size of a malloc call. If the /// argument passed to malloc is a multiple of the size of the malloced type, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be @@ -300,7 +304,7 @@ const CallInst *llvm::extractCallocCall(const Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast(I); - if (!CI) + if (!CI || isa(CI)) return 0; Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration()) @@ -317,7 +321,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { return 0; // Check free prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) @@ -360,6 +364,26 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, return true; } +/// \brief Compute the size of the underlying object pointed by Ptr. Returns +/// true and the object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + bool RoundToAlign) { + if (!TD) + return false; + + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast(Ptr)); + if (!Visitor.knownSize(Data)) + return false; + + Size = Data.first.getZExtValue(); + return true; +} + STATISTIC(ObjectVisitorArgument, "Number of arguments with unsolved size and offset"); @@ -385,20 +409,29 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); - if (Instruction *I = dyn_cast(V)) { - // If we have already seen this instruction, bail out. Cycles can happen in - // unreachable code after constant propagation. - if (!SeenInsts.insert(I)) - return unknown(); + if (isa(V) || isa(V)) { + // Return cached value or insert unknown in cache if size of V was not + // computed yet in order to avoid recursions in PHis. + std::pair CacheVal = + CacheMap.insert(std::make_pair(V, unknown())); + if (!CacheVal.second) + return CacheVal.first->second; + + SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast(V)) - return visitGEPOperator(*GEP); - return visit(*I); + Result = visitGEPOperator(*GEP); + else + Result = visit(cast(*V)); + return CacheMap[V] = Result; } + if (Argument *A = dyn_cast(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast(V)) return visitConstantPointerNull(*P); + if (GlobalAlias *GA = dyn_cast(V)) + return visitGlobalAlias(*GA); if (GlobalVariable *GV = dyn_cast(V)) return visitGlobalVariable(*GV); if (UndefValue *UV = dyn_cast(V)) @@ -406,8 +439,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless - if (CE->getOpcode() == Instruction::GetElementPtr) - return visitGEPOperator(cast(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -510,14 +541,19 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); - if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices()) + APInt Offset(IntTyBits, 0); + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset)) return unknown(); - SmallVector Ops(GEP.idx_begin(), GEP.idx_end()); - APInt Offset(IntTyBits,TD->getIndexedOffset(GEP.getPointerOperandType(),Ops)); return std::make_pair(PtrData.first, PtrData.second + Offset); } +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { + if (GA.mayBeOverridden()) + return unknown(); + return compute(GA.getAliasee()); +} + SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); @@ -536,9 +572,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { - // too complex to analyze statically. - return unknown(); +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { + if (PHI.getNumIncomingValues() == 0) + return unknown(); + + SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); + if (!bothKnown(Ret)) + return unknown(); + + // Verify that all PHI incoming pointers have the same size and offset. + for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { + SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); + if (!bothKnown(EdgeData) || EdgeData != Ret) + return unknown(); + } + return Ret; } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { @@ -619,6 +667,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { } else if (isa(V) || (isa(V) && cast(V)->getOpcode() == Instruction::IntToPtr) || + isa(V) || isa(V)) { // ignore values where we cannot do more than what ObjectSizeVisitor can Result = unknown(); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 9872890494556..2240e9de33eb8 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements an analysis that determines, for a given memory -// operation, what preceding memory operations it depends on. It builds on +// operation, what preceding memory operations it depends on. It builds on // alias analysis information, and tries to provide a lazy, caching interface to // a common kind of alias information query. // @@ -16,21 +16,21 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/PredIteratorCache.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/PredIteratorCache.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr, static const int BlockScanLimit = 500; char MemoryDependenceAnalysis::ID = 0; - + // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { /// RemoveFromReverseMap - This is a helper function that removes Val from /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. template -static void RemoveFromReverseMap(DenseMap > &ReverseMap, Instruction *Inst, KeyTy Val) { typename DenseMap >::iterator @@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (LI->isUnordered()) { Loc = AA->getLocation(LI); return AliasAnalysis::Ref; - } else if (LI->getOrdering() == Monotonic) { + } + if (LI->getOrdering() == Monotonic) { Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } @@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (SI->isUnordered()) { Loc = AA->getLocation(SI); return AliasAnalysis::Mod; - } else if (SI->getOrdering() == Monotonic) { + } + if (SI->getOrdering() == Monotonic) { Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } @@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { // Limit the amount of scanning we do so we don't end up with quadratic - // running time on extreme testcases. + // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); Instruction *Inst = --ScanIt; - + // If this inst is a memory op, get the pointer it accessed AliasAnalysis::Location Loc; AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); @@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool +static bool isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, @@ -262,7 +264,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, // If we haven't already computed the base/offset of MemLoc, do so now. if (MemLocBase == 0) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, @@ -283,25 +285,31 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, const DataLayout &TD) { // We can only extend simple integer loads. if (!isa(LI->getType()) || !LI->isSimple()) return 0; - + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + return 0; + // Get the base of this load. int64_t LIOffs = 0; - const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); - + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); + // If the two pointers are not based on the same pointer, we can't tell that // they are related. if (LIBase != MemLocBase) return 0; - + // Okay, the two values are based on the same pointer, but returned as // no-alias. This happens when we have things like two byte loads at "P+1" // and "P+3". Check to see if increasing the size of the "LI" load up to its // alignment (or the largest native integer type) will allow us to load all // the bits required by MemLoc. - + // If MemLoc is before LI, then no widening of LI will help us out. if (MemLocOffs < LIOffs) return 0; - + // Get the alignment of the load in bytes. We assume that it is safe to load // any legal integer up to this size without a problem. For example, if we're // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can @@ -310,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned LoadAlign = LI->getAlignment(); int64_t MemLocEnd = MemLocOffs+MemLocSize; - + // If no amount of rounding up will let MemLoc fit into LI, then bail out. if (LIOffs+LoadAlign < MemLocEnd) return 0; - + // This is the size of the load to try. Start with the next larger power of // two. unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - + while (1) { // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. @@ -327,8 +335,8 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->getFnAttributes(). - hasAttribute(Attributes::AddressSafety)) + LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -337,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; - + NewLoadByteSize <<= 1; } } @@ -345,15 +353,23 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases -/// with reads from read-only locations. +/// with reads from read-only locations. If possible, pass the query +/// instruction as well; this function may take advantage of the metadata +/// annotated to the query instruction to refine the result. MemDepResult MemoryDependenceAnalysis:: -getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, - BasicBlock::iterator ScanIt, BasicBlock *BB) { +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB, + Instruction *QueryInst) { const Value *MemLocBase = 0; int64_t MemLocOffset = 0; - unsigned Limit = BlockScanLimit; + bool isInvariantLoad = false; + if (isLoad && QueryInst) { + LoadInst *LI = dyn_cast(QueryInst); + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + isInvariantLoad = true; + } // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -368,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa(II)) continue; - + // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { @@ -392,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(LI); AliasAnalysis::Location LoadLoc = AA->getLocation(LI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - + if (isLoad) { if (R == AliasAnalysis::NoAlias) { // If this is an over-aligned integer load (for example, @@ -409,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) return MemDepResult::getClobber(Inst); - + continue; } - + // Must aliased loads are defs of each other. if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); @@ -427,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (R == AliasAnalysis::PartialAlias) return MemDepResult::getClobber(Inst); #endif - + // Random may-alias loads don't depend on each other without a // dependence. continue; @@ -444,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } - + if (StoreInst *SI = dyn_cast(Inst)) { // Atomic stores have complications involved. // FIXME: This is overly conservative. @@ -460,14 +476,16 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. AliasAnalysis::Location StoreLoc = AA->getLocation(SI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); - + if (R == AliasAnalysis::NoAlias) continue; if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; return MemDepResult::getClobber(Inst); } @@ -482,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); // Be conservative if the accessed pointer may alias the allocation. @@ -516,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(Inst); } } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) @@ -528,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, /// depends. MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { Instruction *ScanPos = QueryInst; - + // Check for a cached result MemDepResult &LocalCache = LocalDeps[QueryInst]; - + // If the cached entry is non-dirty, just return it. Note that this depends // on MemDepResult's default constructing to 'dirty'. if (!LocalCache.isDirty()) return LocalCache; - + // Otherwise, if we have a dirty entry, we know we can start the scan at that // instruction, which may save us some work. if (Instruction *Inst = LocalCache.getInst()) { ScanPos = Inst; - + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); } - + BasicBlock *QueryParent = QueryInst->getParent(); - + // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { // No dependence found. If this is the entry block of the function, it is @@ -565,7 +583,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, - QueryParent); + QueryParent, QueryInst); } else if (isa(QueryInst) || isa(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); @@ -575,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); } - + // Remember the result! if (Instruction *I = LocalCache.getInst()) ReverseLocalDeps[I].insert(QueryInst); - + return LocalCache; } @@ -620,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// the uncached case, this starts out as the set of predecessors we care /// about. SmallVector DirtyBlocks; - + if (!Cache.empty()) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. @@ -628,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { ++NumCacheNonLocal; return Cache; } - + // If we already have a partially computed set of results, scan them to // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) if (I->getResult().isDirty()) DirtyBlocks.push_back(I->getBB()); - + // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); - + ++NumCacheDirtyNonLocal; //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " // << Cache.size() << " cached: " << *QueryInst; @@ -649,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.push_back(*PI); ++NumUncacheNonLocal; } - + // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); SmallPtrSet Visited; - + unsigned NumSortedEntries = Cache.size(); DEBUG(AssertSorted(Cache)); - + // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { BasicBlock *DirtyBB = DirtyBlocks.back(); DirtyBlocks.pop_back(); - + // Already processed this block? if (!Visited.insert(DirtyBB)) continue; - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. DEBUG(AssertSorted(Cache, NumSortedEntries)); - NonLocalDepInfo::iterator Entry = + NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, NonLocalDepEntry(DirtyBB)); if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; - if (Entry != Cache.begin()+NumSortedEntries && + if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. if (!Entry->getResult().isDirty()) continue; - + // Otherwise, remember this slot so we can update the value. ExistingResult = &*Entry; } - + // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); @@ -699,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { QueryCS.getInstruction()); } } - + // Find out if this block has a local dependency for QueryInst. MemDepResult Dep; - + if (ScanPos != DirtyBB->begin()) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { @@ -712,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { } else { Dep = MemDepResult::getNonFuncLocal(); } - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! if (!Dep.isNonLocal()) { @@ -728,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Instruction *Inst = Dep.getInst()) ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); } else { - + // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) DirtyBlocks.push_back(*PI); } } - + return Cache; } @@ -753,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + PHITransAddr Address(const_cast(Loc.Ptr), TD); - + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI @@ -778,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis:: GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = @@ -786,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, NonLocalDepEntry(BB)); if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; - + // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; return ExistingResult->getResult(); - } - + } + // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. @@ -807,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; ScanPos = ExistingResult->getResult().getInst(); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } - + // Scan the block for the dependency. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache->push_back(NonLocalDepEntry(BB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! if (!Dep.isDef() && !Dep.isClobber()) return Dep; - + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently // update MemDep when we remove instructions. Instruction *Inst = Dep.getInst(); @@ -843,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain /// number of elements in the array that are already properly ordered. This is /// optimized for the case when only a few entries are added. -static void +static void SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { switch (Cache.size() - NumSortedEntries) { @@ -895,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVectorImpl &Result, DenseMap &Visited, bool SkipFirstBlock) { - + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -909,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. - std::pair Pair = + std::pair Pair = NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); NonLocalPointerInfo *CacheInfo = &Pair.first->second; @@ -971,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DenseMap::iterator VI = Visited.find(I->getBB()); if (VI == Visited.end() || VI->second == Pointer.getAddr()) continue; - + // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a // non-fully cached query, but there is little point in doing this. return true; } } - + Value *Addr = Pointer.getAddr(); for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { @@ -989,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, ++NumCacheCompleteNonLocalPtr; return false; } - + // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, @@ -998,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else CacheInfo->Pair = BBSkipFirstBlockPair(); - + SmallVector Worklist; Worklist.push_back(StartBB); - + // PredList used inside loop. SmallVector, 16> PredList; @@ -1012,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); DEBUG(AssertSorted(*Cache)); - + while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - + // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have @@ -1027,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, NumSortedEntries); - + // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } } - + // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. // If not, we just add the predecessors to the worklist and scan them with @@ -1051,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NewBlocks.push_back(*PI); continue; } - + // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. @@ -1066,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } - + // We do need to do phi translation, if we know ahead of time we can't phi // translate this value, don't even try. if (!Pointer.IsPotentiallyPHITranslatable()) goto PredTranslationFailure; - + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1094,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); - + // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur // with PHI translation when a critical edge exists and the PHI node in @@ -1111,14 +1129,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) continue; - + // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0; i < PredList.size(); i++) + for (unsigned i = 0, n = PredList.size(); i < n; ++i) Visited.erase(PredList[i].first); goto PredTranslationFailure; @@ -1127,10 +1145,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Actually process results here; this need to be a separate loop to avoid // calling getNonLocalPointerDepFromBB for blocks we don't want to return - // any results for. (getNonLocalPointerDepFromBB will modify our + // any results for. (getNonLocalPointerDepFromBB will modify our // datastructures in ways the code after the PredTranslationFailure label // doesn't expect.) - for (unsigned i = 0; i < PredList.size(); i++) { + for (unsigned i = 0, n = PredList.size(); i < n; ++i) { BasicBlock *Pred = PredList[i].first; PHITransAddr &PredPointer = PredList[i].second; Value *PredPtrVal = PredPointer.getAddr(); @@ -1170,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; } } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -1188,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // The following code is "failure"; we can't produce a sane translation // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - + if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); } - + // Since we failed phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole @@ -1209,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // we have to bail out. if (SkipFirstBlock) return true; - + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); if (I->getBB() != BB) continue; - + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); I->setResult(MemDepResult::getUnknown()); @@ -1234,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, /// CachedNonLocalPointerInfo, remove it. void MemoryDependenceAnalysis:: RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { - CachedNonLocalPointerInfo::iterator It = + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; - + // Remove all of the entries in the BB->val map. This involves removing // instructions from the reverse map. NonLocalDepInfo &PInfo = It->second.NonLocalDeps; - + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); } - + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). NonLocalPointerDeps.erase(It); } @@ -1305,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Remove this local dependency info. LocalDeps.erase(LocalDepEntry); } - + // If we have any cached pointer dependencies on this instruction, remove // them. If the instruction has non-pointer type, then it can't be a pointer // base. - + // Remove it from both the load info and the store info. The instruction // can't be in either of these maps if it is non-pointer. if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); } - + // Loop over all of the things that depend on the instruction we're removing. - // + // SmallVector, 8> ReverseDepsToAdd; // If we find RemInst as a clobber or Def in any of the maps for other values, @@ -1330,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); - + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { SmallPtrSet &ReverseDeps = ReverseDepIt->second; // RemInst can't be the terminator if it has local stuff depending on it. assert(!ReverseDeps.empty() && !isa(RemInst) && "Nothing can locally depend on a terminator"); - + for (SmallPtrSet::iterator I = ReverseDeps.begin(), E = ReverseDeps.end(); I != E; ++I) { Instruction *InstDependingOnRemInst = *I; assert(InstDependingOnRemInst != RemInst && "Already removed our local dep info"); - + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; - + // Make sure to remember that new things depend on NewDepInst. assert(NewDirtyVal.getInst() && "There is no way something else can have " "a local dep on this if it is a terminator!"); - ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst)); } - + ReverseLocalDeps.erase(ReverseDepIt); // Add new reverse deps after scanning the set, to avoid invalidating the @@ -1363,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); if (ReverseDepIt != ReverseNonLocalDeps.end()) { SmallPtrSet &Set = ReverseDepIt->second; for (SmallPtrSet::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); - + PerInstNLInfo &INLD = NonLocalDeps[*I]; // The information is now dirty! INLD.second = true; - - for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); } @@ -1396,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a // value in the NonLocalPointerDeps info. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = @@ -1404,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { SmallPtrSet &Set = ReversePtrDepIt->second; SmallVector,8> ReversePtrDepsToAdd; - + for (SmallPtrSet::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { ValueIsLoadPair P = *I; assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); - + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; - + // The cache is not valid for any specific block anymore. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); - + // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); } - + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its // subsequent value may invalidate the sortedness. std::sort(NLPDI.begin(), NLPDI.end()); } - + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); - + while (!ReversePtrDepsToAdd.empty()) { ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] .insert(ReversePtrDepsToAdd.back().second); ReversePtrDepsToAdd.pop_back(); } } - - + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); @@ -1456,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { assert(I->second.getInst() != D && "Inst occurs in data structures"); } - + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), E = NonLocalPointerDeps.end(); I != E; ++I) { assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); @@ -1465,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { II != E; ++II) assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } - + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), E = NonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1474,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = INLD.first.end(); II != EE; ++II) assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), E = ReverseLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1482,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), E = ReverseNonLocalDeps.end(); I != E; ++I) { @@ -1491,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseNonLocalPtrDepTy::const_iterator I = ReverseNonLocalPtrDeps.begin(), E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in rev NLPD map"); - + for (SmallPtrSet::const_iterator II = I->second.begin(), E = I->second.end(); II != E; ++II) assert(*II != ValueIsLoadPair(D, false) && *II != ValueIsLoadPair(D, true) && "Inst occurs in ReverseNonLocalPtrDeps map"); } - + } diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index f8c751481976f..03415375263a8 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -16,13 +16,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; namespace { diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 2eb4137c533a7..907e9621baed3 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" using namespace llvm; namespace { diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index c35737e4724c8..e6af0663feaa9 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,11 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index d4ad726481661..30d213b77576f 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -25,24 +25,23 @@ #define DEBUG_TYPE "ball-larus-numbering" #include "llvm/Analysis/PathNumbering.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InstrTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/TypeBuilder.h" #include "llvm/Pass.h" -#include "llvm/TypeBuilder.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include +#include #include #include #include -#include using namespace llvm; diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp index b361d3f4fa94b..bc53221d3176d 100644 --- a/lib/Analysis/PathProfileInfo.cpp +++ b/lib/Analysis/PathProfileInfo.cpp @@ -13,15 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "path-profile-info" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/PathProfileInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include using namespace llvm; diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index 0fcdfe75aefdf..48d7d05d788f6 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -13,15 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "path-profile-verifier" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfoTypes.h" #include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include using namespace llvm; @@ -85,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - arrayMap[0][F->begin()][0] = i++; + arrayMap[(BasicBlock*)0][F->begin()][0] = i++; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -126,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { << currentPath->getCount() << "\n"); // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) - edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] + edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); for( ProfilePathEdgeIterator nextEdge = pev->begin(), diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 6ed27297923fc..96804a01edc65 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -14,13 +14,13 @@ #define DEBUG_TYPE "postdomtree" #include "llvm/Analysis/PostDominators.h" -#include "llvm/Instructions.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp index a4f634af531e9..d7f444b4b6d7f 100644 --- a/lib/Analysis/ProfileDataLoader.cpp +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ProfileDataLoader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Module.h" -#include "llvm/InstrTypes.h" -#include "llvm/Analysis/ProfileDataLoader.h" #include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp index c43cff05a4da1..2ee0093a8f577 100644 --- a/lib/Analysis/ProfileDataLoaderPass.cpp +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -15,22 +15,22 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-metadata-loader" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/BasicBlock.h" -#include "llvm/InstrTypes.h" -#include "llvm/Module.h" -#include "llvm/LLVMContext.h" -#include "llvm/MDBuilder.h" -#include "llvm/Metadata.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumEdgesRead, "The # of edges read."); diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 12b59e0a6fd5d..b284b995ac780 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -12,14 +12,14 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-estimator" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index b5b7ac1e50114..9626a48b9d0de 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -12,16 +12,16 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-info" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/SmallSet.h" -#include -#include #include +#include +#include using namespace llvm; namespace llvm { @@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT:: succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); if (Succ == End) { - P[0] = BB; + P[(const BasicBlock*)0] = BB; if (Mode & GetPathToExit) { hasFoundPath = true; BB = 0; @@ -752,10 +752,10 @@ void ProfileInfoT::repair(const Function *F) { Succ != End; ++Succ) { Path P; GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[0]) { + if (Dest && Dest != P[(const BasicBlock*)0]) { AllEdgesHaveSameReturn = false; } - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; } if (AllEdgesHaveSameReturn) { if(EstimateMissingEdges(BB)) { @@ -927,7 +927,7 @@ void ProfileInfoT::repair(const Function *F) { Path P; const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; if (!Dest) continue; if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index 5c7c97cad1e5a..f1f3e940c9326 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -14,8 +14,8 @@ #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/Module.h" -#include "llvm/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 5ecf052a1a24b..346f8d6d6258b 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -12,20 +12,20 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-loader" -#include "llvm/BasicBlock.h" -#include "llvm/InstrTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 0cb158865afef..c8896de89301e 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-verifier" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/CallSite.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Debug.h" #include using namespace llvm; diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp new file mode 100644 index 0000000000000..0a342b2167e4b --- /dev/null +++ b/lib/Analysis/PtrUseVisitor.cpp @@ -0,0 +1,36 @@ +//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Implementation of the pointer use visitors. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PtrUseVisitor.h" + +using namespace llvm; + +void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) { + if (VisitedUses.insert(&UI.getUse())) { + UseToVisit NewU = { + UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown), + Offset + }; + Worklist.push_back(llvm_move(NewU)); + } + } +} + +bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) { + if (!IsOffsetKnown) + return false; + + return GEPI.accumulateConstantOffset(DL, Offset); +} diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 30f0d2f10d862..fad5074086ce6 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" - #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionIterator.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "region" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 8b23cc7042421..c5f1b925921b9 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -9,16 +9,16 @@ // Print out the region tree of a function using dotty/graphviz. //===----------------------------------------------------------------------===// +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPrinter.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/DOTGraphTraitsPass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index e3189ecc8994c..6ea915fdb0b76 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -59,22 +59,25 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "scalar-evolution" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -83,9 +86,7 @@ #include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -4229,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { return Max ? Max : SE->getCouldNotCompute(); } +bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, + ScalarEvolution *SE) const { + if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + return true; + + if (!ExitNotTaken.ExitingBlock) + return false; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExactNotTaken != SE->getCouldNotCompute() + && SE->hasOperand(ENT->ExactNotTaken, S)) { + return true; + } + } + return false; +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -6120,8 +6140,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, getTypeSizeInBits(ICI->getOperand(0)->getType())) return false; - // Now that we found a conditional branch that dominates the loop, check to - // see if it is the comparison we are looking for. + // Now that we found a conditional branch that dominates the loop or controls + // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; if (Inverse) FoundPred = ICI->getInversePredicate(); @@ -6939,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + + for (DenseMap::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + BackedgeTakenCounts.erase(I++); + } + else + ++I; + } } typedef DenseMap VerifyMap; diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index e9edb3e083def..79c5f0deb03b8 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,9 +19,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 111bfb4a6a766..fcd7ce272a221 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -1523,9 +1523,8 @@ Value *SCEVExpander::expand(const SCEV *S) { } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map, TrackingVH >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; @@ -1600,14 +1599,14 @@ static bool width_descending(Value *lhs, Value *rhs) { /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl &DeadInsts, - const TargetLowering *TLI) { + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast(I); ++I) { Phis.push_back(Phi); } - if (TLI) + if (TTI) std::sort(Phis.begin(), Phis.end(), width_descending); unsigned NumElim = 0; @@ -1635,8 +1634,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TLI - && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index c819666ee4444..15b78728a73c3 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "sparseprop" #include "llvm/Analysis/SparsePropagation.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp new file mode 100644 index 0000000000000..64f8e96884c71 --- /dev/null +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -0,0 +1,558 @@ +//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tti" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +// Setup the analysis group to manage the TargetTransformInfo passes. +INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) +char TargetTransformInfo::ID = 0; + +TargetTransformInfo::~TargetTransformInfo() { +} + +void TargetTransformInfo::pushTTIStack(Pass *P) { + TopTTI = this; + PrevTTI = &P->getAnalysis(); + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = this; +} + +void TargetTransformInfo::popTTIStack() { + TopTTI = 0; + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = PrevTTI; + + PrevTTI = 0; +} + +void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); +} + +unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + return PrevTTI->getOperationCost(Opcode, Ty, OpTy); +} + +unsigned TargetTransformInfo::getGEPCost( + const Value *Ptr, ArrayRef Operands) const { + return PrevTTI->getGEPCost(Ptr, Operands); +} + +unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, + int NumArgs) const { + return PrevTTI->getCallCost(FTy, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost(const Function *F, + int NumArgs) const { + return PrevTTI->getCallCost(F, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost( + const Function *F, ArrayRef Arguments) const { + return PrevTTI->getCallCost(F, Arguments); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +} + +unsigned TargetTransformInfo::getUserCost(const User *U) const { + return PrevTTI->getUserCost(U); +} + +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return PrevTTI->isLoweredToCall(F); +} + +bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { + return PrevTTI->isLegalAddImmediate(Imm); +} + +bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { + return PrevTTI->isLegalICmpImmediate(Imm); +} + +bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + +bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { + return PrevTTI->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isTypeLegal(Type *Ty) const { + return PrevTTI->isTypeLegal(Ty); +} + +unsigned TargetTransformInfo::getJumpBufAlignment() const { + return PrevTTI->getJumpBufAlignment(); +} + +unsigned TargetTransformInfo::getJumpBufSize() const { + return PrevTTI->getJumpBufSize(); +} + +bool TargetTransformInfo::shouldBuildLookupTables() const { + return PrevTTI->shouldBuildLookupTables(); +} + +TargetTransformInfo::PopcntSupportKind +TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { + return PrevTTI->getPopcntSupport(IntTyWidthInBit); +} + +unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Imm, Ty); +} + +unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { + return PrevTTI->getNumberOfRegisters(Vector); +} + +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + +unsigned TargetTransformInfo::getMaximumUnrollFactor() const { + return PrevTTI->getMaximumUnrollFactor(); +} + +unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, + Type *Ty, + OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); +} + +unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const { + return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return PrevTTI->getCastInstrCost(Opcode, Dst, Src); +} + +unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + return PrevTTI->getCFInstrCost(Opcode); +} + +unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return PrevTTI->getVectorInstrCost(Opcode, Val, Index); +} + +unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + ; +} + +unsigned +TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef Tys) const { + return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys); +} + +unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { + return PrevTTI->getNumberOfParts(Tp); +} + +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { + return PrevTTI->getAddressComputationCost(Tp); +} + +namespace { + +struct NoTTI : ImmutablePass, TargetTransformInfo { + const DataLayout *DL; + + NoTTI() : ImmutablePass(ID), DL(0) { + initializeNoTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + // Note that this subclass is special, and must *not* call initializeTTI as + // it does not chain. + TopTTI = this; + PrevTTI = 0; + DL = getAnalysisIfAvailable(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // Note that this subclass is special, and must *not* call + // TTI::getAnalysisUsage as it breaks the recursion. + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TCC_Free; + + // Otherwise, the default basic cost is used. + return TCC_Basic; + + case Instruction::IntToPtr: + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::PtrToInt: + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && + Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) + return TCC_Free; + + return TCC_Basic; + } + } + + unsigned getGEPCost(const Value *Ptr, + ArrayRef Operands) const { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa(Operands[Idx])) + return TCC_Basic; + + return TCC_Free; + } + + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TCC_Basic * (NumArgs + 1); + } + + unsigned getCallCost(const Function *F, int NumArgs = -1) const { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); + return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!TopTTI->isLoweredToCall(F)) + return TCC_Basic; // Give a basic cost if it will be lowered directly. + + return TopTTI->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, + ArrayRef Arguments) const { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return TopTTI->getCallCost(F, Arguments.size()); + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) const { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TCC_Basic; + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + return TCC_Free; + } + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Arguments) const { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); + } + + unsigned getUserCost(const User *U) const { + if (isa(U)) + return TCC_Free; // Model all PHI nodes as free. + + if (const GEPOperator *GEP = dyn_cast(U)) + // In the basic model we just assume that all-constant GEPs will be + // folded into their uses via addressing modes. + return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + + if (ImmutableCallSite CS = U) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return TopTTI->getCallCost(cast(FTy), CS.arg_size()); + } + + SmallVector Arguments; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) + Arguments.push_back(*AI); + + return TopTTI->getCallCost(F, Arguments); + } + + if (const CastInst *CI = dyn_cast(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa(CI->getOperand(0))) + return TCC_Free; + } + + // Otherwise delegate to the fully generic implementations. + return getOperationCost(Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? + U->getOperand(0)->getType() : 0); + } + + bool isLoweredToCall(const Function *F) const { + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == + "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || + Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") + return false; + + return true; + } + + bool isLegalAddImmediate(int64_t Imm) const { + return false; + } + + bool isLegalICmpImmediate(int64_t Imm) const { + return false; + } + + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that reg+reg addressing is allowed. This heuristic is taken from + // the implementation of LSR. + return !BaseGV && BaseOffset == 0 && Scale <= 1; + } + + bool isTruncateFree(Type *Ty1, Type *Ty2) const { + return false; + } + + bool isTypeLegal(Type *Ty) const { + return false; + } + + unsigned getJumpBufAlignment() const { + return 0; + } + + unsigned getJumpBufSize() const { + return 0; + } + + bool shouldBuildLookupTables() const { + return true; + } + + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { + return PSK_Software; + } + + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { + return 1; + } + + unsigned getNumberOfRegisters(bool Vector) const { + return 8; + } + + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + + unsigned getMaximumUnrollFactor() const { + return 1; + } + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind) const { + return 1; + } + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index = 0, Type *SubTp = 0) const { + return 1; + } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return 1; + } + + unsigned getCFInstrCost(unsigned Opcode) const { + return 1; + } + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const { + return 1; + } + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const { + return 1; + } + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef Tys) const { + return 1; + } + + unsigned getNumberOfParts(Type *Tp) const { + return 0; + } + + unsigned getAddressComputationCost(Type *Tp) const { + return 0; + } +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", + "No target information", true, true, true) +char NoTTI::ID = 0; + +ImmutablePass *llvm::createNoTargetTransformInfoPass() { + return new NoTTI(); +} diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 22da857620347..4c68322b82823 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -16,8 +16,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Trace.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 0faf1398ec766..68e43b2cdb633 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -57,12 +57,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Constants.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Metadata.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 3beb373dc5cc8..45dcc5e37ecfb 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,21 +13,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Operator.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; using namespace llvm::PatternMatch; @@ -58,7 +58,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - + // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is // from [0-C]. @@ -84,7 +84,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -266,11 +266,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); KnownZero &= ~Elt; - KnownOne &= Elt; + KnownOne &= Elt; } return; } - + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); @@ -306,7 +306,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } return; } - + if (Argument *A = dyn_cast(V)) { unsigned Align = 0; @@ -345,9 +345,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If either the LHS or the RHS are Zero, the result is zero. ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. @@ -357,9 +357,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Or: { ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. @@ -369,9 +369,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Xor: { ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. @@ -407,8 +407,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -433,7 +433,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + if(TD) { + SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + } else { + SrcBitWidth = SrcTy->getScalarSizeInBits(); + if (!SrcBitWidth) return; + } assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); @@ -460,11 +465,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::SExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - + KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -481,7 +486,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 @@ -493,10 +498,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - + // Unsigned shift right. ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -509,13 +514,13 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - + // Signed shift right. ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. KnownZero |= HighBits; @@ -559,7 +564,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } @@ -606,7 +611,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Align = AI->getAlignment(); if (Align == 0 && TD) Align = TD->getABITypeAlignment(AI->getType()->getElementType()); - + if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); break; @@ -643,7 +648,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, LocalKnownZero.countTrailingOnes())); } } - + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); break; } @@ -799,12 +804,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, KnownZero = ZeroBits[BitWidth - 1]; } -/// isPowerOfTwo - Return true if the given value is known to have exactly one +/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, - unsigned Depth) { +bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return OrZero; @@ -831,19 +835,19 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isPowerOfTwo(X, TD, /*OrZero*/true, Depth); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth); if (ZExtInst *ZI = dyn_cast(V)) - return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth); if (SelectInst *SI = dyn_cast(V)) - return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && - isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || - isPowerOfTwo(Y, TD, /*OrZero*/true, Depth)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -856,7 +860,73 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, // copying a sign bit (sdiv int_min, 2). if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { - return isPowerOfTwo(cast(V)->getOperand(0), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, Depth); + } + + return false; +} + +/// \brief Test whether a GEP's result is known to be non-null. +/// +/// Uses properties inherent in a GEP to try to determine whether it is known +/// to be non-null. +/// +/// Currently this routine does not support vector GEPs. +static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, + unsigned Depth) { + if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) + return false; + + // FIXME: Support vector-GEPs. + assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); + + // If the base pointer is non-null, we cannot walk to a null address with an + // inbounds GEP in address space zero. + if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth)) + return true; + + // Past this, if we don't have DataLayout, we can't do much. + if (!DL) + return false; + + // Walk the GEP operands and see if any operand introduces a non-zero offset. + // If so, then the GEP cannot produce a null pointer, as doing so would + // inherently violate the inbounds contract within address space zero. + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + // Struct types are easy -- they must always be indexed by a constant. + if (StructType *STy = dyn_cast(*GTI)) { + ConstantInt *OpC = cast(GTI.getOperand()); + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = DL->getStructLayout(STy); + uint64_t ElementOffset = SL->getElementOffset(ElementIdx); + if (ElementOffset > 0) + return true; + continue; + } + + // If we have a zero-sized type, the index doesn't matter. Keep looping. + if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0) + continue; + + // Fast path the constant operand case both for efficiency and so we don't + // increment Depth when just zipping down an all-constant GEP. + if (ConstantInt *OpC = dyn_cast(GTI.getOperand())) { + if (!OpC->isZero()) + return true; + continue; + } + + // We post-increment Depth here because while isKnownNonZero increments it + // as well, when we pop back up that increment won't persist. We don't want + // to recurse 10k times just because we have 10k GEP operands. We don't + // bail completely out because we want to handle constant GEPs regardless + // of depth. + if (Depth++ >= MaxDepth) + continue; + + if (isKnownNonZero(GTI.getOperand(), DL, Depth)) + return true; } return false; @@ -881,7 +951,16 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (Depth++ >= MaxDepth) return false; - unsigned BitWidth = getBitWidth(V->getType(), TD); + // Check for pointer simplifications. + if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; + if (GEPOperator *GEP = dyn_cast(V)) + if (isGEPKnownNonNull(GEP, TD, Depth)) + return true; + } + + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); // X | Y != 0 if X != 0 or Y != 0. Value *X = 0, *Y = 0; @@ -955,9 +1034,9 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth)) + if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth)) return true; - if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) + if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth)) return true; } // X * Y. @@ -996,7 +1075,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1026,14 +1105,14 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (Depth == 6) return 1; // Limit search depth. - + Operator *U = dyn_cast(V); switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; - + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); // ashr X, C -> adds C sign bits. Vectors too. @@ -1075,38 +1154,38 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); return std::min(Tmp, Tmp2); - + case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. - + // Special case decrementing a value (ADD X, -1): if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - + // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; - + // If we are subtracting one from a positive number, there is no carry // out of the result. if (KnownZero.isNegative()) return Tmp; } - + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; - + case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; - + // Handle NEG. if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { @@ -1116,26 +1195,26 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; - + // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. if (KnownZero.isNegative()) return Tmp2; - + // Otherwise, we treat this like a SUB. } - + // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; - + case Instruction::PHI: { PHINode *PN = cast(U); // Don't analyze large in-degree PHIs. if (PN->getNumIncomingValues() > 4) break; - + // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); @@ -1152,13 +1231,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // case for targets like X86. break; } - + // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - + if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -1167,7 +1246,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // Nothing known. return FirstAnswer; } - + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine // the number of identical bits in the top of the input value. Mask = ~Mask; @@ -1195,7 +1274,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (Base == 0) return false; - + if (Base == 1) { Multiple = V; return true; @@ -1211,11 +1290,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (CI && CI->getZExtValue() % Base == 0) { Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); - return true; + return true; } - + if (Depth == MaxDepth) return false; // Limit search depth. - + Operator *I = dyn_cast(V); if (!I) return false; @@ -1247,13 +1326,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast(Op1)) if (Constant *MulC = dyn_cast(Mul0)) { - if (Op1C->getType()->getPrimitiveSizeInBits() < + if (Op1C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); - if (Op1C->getType()->getPrimitiveSizeInBits() > + if (Op1C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); - + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) Multiple = ConstantExpr::getMul(MulC, Op1C); return true; @@ -1271,13 +1350,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast(Op0)) if (Constant *MulC = dyn_cast(Mul1)) { - if (Op0C->getType()->getPrimitiveSizeInBits() < + if (Op0C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); - if (Op0C->getType()->getPrimitiveSizeInBits() > + if (Op0C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); - + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) Multiple = ConstantExpr::getMul(MulC, Op0C); return true; @@ -1297,7 +1376,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return false; } -/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// CannotBeNegativeZero - Return true if we can prove that the specified FP /// value is never equal to -0.0. /// /// NOTE: this function will need to be revisited when we support non-default @@ -1306,28 +1385,33 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const ConstantFP *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegZero(); - + if (Depth == 6) return 1; // Limit search depth. const Operator *I = dyn_cast(V); if (I == 0) return false; - + + // Check if the nsz fast-math flag is set + if (const FPMathOperator *FPO = dyn_cast(I)) + if (FPO->hasNoSignedZeros()) + return true; + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. - if (I->getOpcode() == Instruction::FAdd && - isa(I->getOperand(1)) && - cast(I->getOperand(1))->isNullValue()) - return true; - + if (I->getOpcode() == Instruction::FAdd) + if (ConstantFP *CFP = dyn_cast(I->getOperand(1))) + if (CFP->isNullValue()) + return true; + // sitofp and uitofp turn into +0.0 for zero. if (isa(I) || isa(I)) return true; - + if (const IntrinsicInst *II = dyn_cast(I)) // sqrt(-0.0) = -0.0, no other negative results are possible. if (II->getIntrinsicID() == Intrinsic::sqrt) return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); - + if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) { if (F->isDeclaration()) { @@ -1342,7 +1426,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); } } - + return false; } @@ -1359,9 +1443,9 @@ Value *llvm::isBytewiseValue(Value *V) { if (Constant *C = dyn_cast(V)) if (C->isNullValue()) return Constant::getNullValue(Type::getInt8Ty(V->getContext())); - + // Constant float and double values can be handled as integer values if the - // corresponding integer value is "byteable". An important case is 0.0. + // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast(V)) { if (CFP->getType()->isFloatTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); @@ -1369,8 +1453,8 @@ Value *llvm::isBytewiseValue(Value *V) { V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); // Don't handle long double formats, which have strange constraints. } - - // We can handle constant integers that are power of two in size and a + + // We can handle constant integers that are power of two in size and a // multiple of 8 bits. if (ConstantInt *CI = dyn_cast(V)) { unsigned Width = CI->getBitWidth(); @@ -1384,7 +1468,7 @@ Value *llvm::isBytewiseValue(Value *V) { Val2 = Val.lshr(NextWidth); Val2 = Val2.trunc(Val.getBitWidth()/2); Val = Val.trunc(Val.getBitWidth()/2); - + // If the top/bottom halves aren't the same, reject it. if (Val != Val2) return 0; @@ -1392,7 +1476,7 @@ Value *llvm::isBytewiseValue(Value *V) { return ConstantInt::get(V->getContext(), Val); } } - + // A ConstantDataArray/Vector is splatable if all its members are equal and // also splatable. if (ConstantDataSequential *CA = dyn_cast(V)) { @@ -1400,11 +1484,11 @@ Value *llvm::isBytewiseValue(Value *V) { Value *Val = isBytewiseValue(Elt); if (!Val) return 0; - + for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) return 0; - + return Val; } @@ -1428,7 +1512,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = llvm::dyn_cast(IndexedType); + llvm::StructType *STy = dyn_cast(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -1459,7 +1543,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, // the struct's elements had a value that was inserted directly. In the latter // case, perhaps we can't determine each of the subelements individually, but // we might be able to find the complete struct somewhere. - + // Find the value that is at that particular spot Value *V = FindInsertedValue(From, Idxs); @@ -1518,7 +1602,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, if (C == 0) return 0; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } - + if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices @@ -1543,7 +1627,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), InsertBefore); } - + // This insert value inserts something else than what we are looking for. // See if the (aggregrate) value inserted into has the value we are // looking for, then. @@ -1558,26 +1642,26 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, makeArrayRef(req_idx, idx_range.end()), InsertBefore); } - + if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. - - // Calculate the number of indices required + + // Calculate the number of indices required unsigned size = I->getNumIndices() + idx_range.size(); // Allocate some space to put the new indices in SmallVector Idxs; Idxs.reserve(size); // Add indices from the extract value instruction Idxs.append(I->idx_begin(), I->idx_end()); - + // Add requested indices Idxs.append(idx_range.begin(), idx_range.end()); - assert(Idxs.size() == size + assert(Idxs.size() == size && "Number of indices added not correct?"); - + return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value @@ -1589,41 +1673,33 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &TD) { - Operator *PtrOp = dyn_cast(Ptr); - if (PtrOp == 0 || Ptr->getType()->isVectorTy()) - return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + const DataLayout *TD) { + // Without DataLayout, conservatively assume 64-bit offsets, which is + // the widest we support. + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + APInt ByteOffset(BitWidth, 0); + while (1) { + if (Ptr->getType()->isVectorTy()) + break; + + if (GEPOperator *GEP = dyn_cast(Ptr)) { + APInt GEPOffset(BitWidth, 0); + if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) + break; + ByteOffset += GEPOffset; + Ptr = GEP->getPointerOperand(); + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + Ptr = cast(Ptr)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(Ptr)) { + if (GA->mayBeOverridden()) + break; + Ptr = GA->getAliasee(); } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; + break; } } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = SignExtend64(Offset, PtrSize); - - return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); + Offset = ByteOffset.getSExtValue(); + return Ptr; } @@ -1636,26 +1712,26 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Look through bitcast instructions and geps. V = V->stripPointerCasts(); - + // If the value is a GEP instructionor constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast(V)) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; - + // Make sure the index-ee is a pointer to array of i8. PointerType *PT = cast(GEP->getOperand(0)->getType()); ArrayType *AT = dyn_cast(PT->getElementType()); if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; - + // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); if (FirstIdx == 0 || !FirstIdx->isZero()) return false; - + // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. @@ -1681,13 +1757,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, Str = ""; return true; } - + // Must be a Constant Array const ConstantDataArray *Array = dyn_cast(GV->getInitializer()); if (Array == 0 || !Array->isString()) return false; - + // Get the number of elements in the array uint64_t NumElts = Array->getType()->getArrayNumElements(); @@ -1696,10 +1772,10 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (Offset > NumElts) return false; - + // Skip over 'offset' bytes. Str = Str.substr(Offset); - + if (TrimAtNul) { // Trim off the \0 and anything after it. If the array is not nul // terminated, we just return the whole end of string. The client may know @@ -1753,7 +1829,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { if (Len1 != Len2) return 0; return Len1; } - + // Otherwise, see if we can read the string. StringRef StrData; if (!getConstantStringInfo(V, StrData)) @@ -1940,3 +2016,19 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; // Misc instructions which have effects } } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} -- cgit v1.3