diff options
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r-- | lib/Transforms/IPO/ArgumentPromotion.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/IPO/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Transforms/IPO/DeadArgumentElimination.cpp | 21 | ||||
-rw-r--r-- | lib/Transforms/IPO/FunctionAttrs.cpp | 44 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 223 | ||||
-rw-r--r-- | lib/Transforms/IPO/IPO.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 90 | ||||
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/IPO/PruneEH.cpp | 29 | ||||
-rw-r--r-- | lib/Transforms/IPO/StructRetPromotion.cpp | 10 |
10 files changed, 270 insertions, 191 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 40a87e880d7bf..89f213e2ac367 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -64,7 +64,7 @@ namespace { CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC); + virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) : CallGraphSCCPass(&ID), maxElements(maxElements) {} @@ -91,20 +91,21 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); } -bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (CallGraphNode *CGN = PromoteArguments(SCC[i])) { + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + if (CallGraphNode *CGN = PromoteArguments(*I)) { LocalChange = true; - SCC[i] = CGN; + SCC.ReplaceNode(*I, CGN); } + } Changed |= LocalChange; // Remember that we changed something. } while (LocalChange); - + return Changed; } @@ -873,8 +874,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF_CGN->stealCalledFunctionsFrom(CG[F]); - // Now that the old function is dead, delete it. - delete CG.removeFunctionFromModule(F); + // Now that the old function is dead, delete it. If there is a dangling + // reference to the CallgraphNode, just leave the dead function around for + // someone else to nuke. + CallGraphNode *CGN = CG[F]; + if (CGN->getNumReferences() == 0) + delete CG.removeFunctionFromModule(CGN); + else + F->setLinkage(Function::ExternalLinkage); return NF_CGN; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 92bef3bb75e94..65483e8fed636 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -23,3 +23,5 @@ add_llvm_library(LLVMipo StripSymbols.cpp StructRetPromotion.cpp ) + +target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 227602d19f568..6443dd4f47a65 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -243,6 +243,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } + if (MDNode *N = Call->getDbgMetadata()) + New->setDbgMetadata(N); + Args.clear(); if (!Call->use_empty()) @@ -694,18 +697,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); - // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which - // have zero fixed arguments. - // - // Note that we apply this hack for a vararg fuction that does not have any - // arguments anymore, but did have them before (so don't bother fixing - // functions that were already broken wrt CWriter). - bool ExtraArgHack = false; - if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) { - ExtraArgHack = true; - Params.push_back(Type::getInt32Ty(F->getContext())); - } - // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -755,9 +746,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } - if (ExtraArgHack) - Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext()))); - // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); @@ -785,6 +773,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } + if (MDNode *N = Call->getDbgMetadata()) + New->setDbgMetadata(N); + Args.clear(); if (!Call->use_empty()) { diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 298d5cf39162f..9bd7af61c531f 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -44,20 +44,20 @@ namespace { FunctionAttrs() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(std::vector<CallGraphNode *> &SCC); + bool runOnSCC(CallGraphSCC &SCC); // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. - bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC); + bool AddReadAttrs(const CallGraphSCC &SCC); // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. - bool AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC); + bool AddNoCaptureAttrs(const CallGraphSCC &SCC); // IsFunctionMallocLike - Does this function allocate new memory? bool IsFunctionMallocLike(Function *F, SmallPtrSet<Function*, 8> &) const; // AddNoAliasAttrs - Deduce noalias attributes for the SCC. - bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC); + bool AddNoAliasAttrs(const CallGraphSCC &SCC); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -123,19 +123,19 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) { } /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. -bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]->getFunction()); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert((*I)->getFunction()); // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) // External node - may write memory. Just give up. @@ -210,8 +210,8 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { // Success! Functions in this SCC do not access memory, or only read memory. // Give them the appropriate attribute. bool MadeChange = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F->doesNotAccessMemory()) // Already perfect! @@ -239,13 +239,13 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { } /// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. -bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { bool Changed = false; // Check each function in turn, determining which pointer arguments are not // captured. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) // External node - skip it; @@ -334,18 +334,18 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, } /// AddNoAliasAttrs - Deduce noalias attributes for the SCC. -bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]->getFunction()); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert((*I)->getFunction()); // Check each function in turn, determining which functions return noalias // pointers. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) // External node - skip it; @@ -370,8 +370,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { } bool MadeChange = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) continue; @@ -383,7 +383,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { return MadeChange; } -bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ddff5ef8b36c8..b429213a7db31 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -143,7 +143,8 @@ struct GlobalStatus { static bool SafeToDestroyConstant(const Constant *C) { if (isa<GlobalValue>(C)) return false; - for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) if (const Constant *CU = dyn_cast<Constant>(*UI)) { if (!SafeToDestroyConstant(CU)) return false; } else @@ -158,7 +159,8 @@ static bool SafeToDestroyConstant(const Constant *C) { /// static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet<const PHINode*, 16> &PHIUsers) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { GS.HasNonInstructionUser = true; @@ -185,7 +187,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::isStored) { - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( + SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) @@ -610,62 +613,69 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified /// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. -static bool AllUsesOfValueWillTrapIfNull(Value *V, - SmallPtrSet<PHINode*, 8> &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa<LoadInst>(*UI)) { +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet<const PHINode*, 8> &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + + if (isa<LoadInst>(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { if (CI->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { if (II->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) { + } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) { + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) { + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa<ICmpInst>(*UI) && + } else if (isa<ICmpInst>(U) && isa<ConstantPointerNull>(UI->getOperand(1))) { // Ignore icmp X, null } else { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; } + } return true; } /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. -static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { - SmallPtrSet<PHINode*, 8> PHIs; +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + SmallPtrSet<const PHINode*, 8> PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; - } else if (isa<StoreInst>(*UI)) { + } else if (isa<StoreInst>(U)) { // Ignore stores to the global. } else { // We don't know or understand this user, bail out. - //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - + } return true; } @@ -682,16 +692,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { Changed = true; } } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - if (I->getOperand(0) == V) { + CallSite CS(I); + if (CS.getCalledValue() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - I->setOperand(0, NewV); + CS.setCalledFunction(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == V) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { PassedAsArg = true; - I->setOperand(i, NewV); + CS.setArgument(i, NewV); } if (PassedAsArg) { @@ -938,29 +949,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless /// it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, - GlobalVariable *GV, - SmallPtrSet<PHINode*, 8> &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *Inst = cast<Instruction>(*UI); - +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet<const PHINode*, 8> &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + const Instruction *Inst = cast<Instruction>(*UI); + if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { continue; // Fine, ignore. } - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - if (isa<GetElementPtrInst>(Inst)) { + // Must index into the array and into the struct. + if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - if (PHINode *PN = dyn_cast<PHINode>(Inst)) { + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. if (PHIs.insert(PN)) @@ -969,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, continue; } - if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; @@ -1029,11 +1042,12 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, - SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, - SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { const Instruction *User = cast<Instruction>(*UI); // Comparison against null is ok. @@ -1084,8 +1098,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; - for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; - ++UI) + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) @@ -1098,8 +1112,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin(), - E = LoadUsingPHIs.end(); I != E; ++I) { + for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() + , E = LoadUsingPHIs.end(); I != E; ++I) { const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); @@ -1448,6 +1462,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, const Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { + if (!TD) + return false; + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1466,66 +1483,66 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // malloc to be stored into the specified global, loaded setcc'd, and // GEP'd. These are all things we could transform to using the global // for. - { - SmallPtrSet<PHINode*, 8> PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) - return false; - } + SmallPtrSet<const PHINode*, 8> PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; // If we have a global that is only initialized with a fixed size malloc, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - if (Value *NElems = getMallocArraySize(CI, TD, true)) { - if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); - return true; - } + Value *NElems = getMallocArraySize(CI, TD, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); + return true; + } - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) - AllocTy = AT->getElementType(); + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) + if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); - if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = - dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); - unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); - Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); - Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); - Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, - AllocSize, NumElements, - CI->getName()); - Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); - CI->replaceAllUsesWith(Cast); - CI->eraseFromParent(); - CI = dyn_cast<BitCastInst>(Malloc) ? - extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); - } - - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); - return true; - } + const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + CI = dyn_cast<BitCastInst>(Malloc) ? + extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); } + + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); + return true; } return false; @@ -1689,8 +1706,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " << GS.AccessingFunction->getName() - << "\n"); + DEBUG(dbgs() << " AccessingFunction = " + << GS.AccessingFunction->getName() << "\n"); DEBUG(dbgs() << " HasMultipleAccessingFunctions = " << GS.HasMultipleAccessingFunctions << "\n"); DEBUG(dbgs() << " HasNonInstructionUser = " @@ -2278,10 +2295,10 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } // Cannot handle inline asm. - if (isa<InlineAsm>(CI->getOperand(0))) return false; + if (isa<InlineAsm>(CI->getCalledValue())) return false; // Resolve function pointers. - Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0))); + Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); if (!Callee) return false; // Cannot resolve. SmallVector<Constant*, 8> Formals; @@ -2500,7 +2517,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { continue; // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section + // aliasee. This check also ensures that it is safe to replace the section // and other attributes of the aliasee with those of the alias. if (!hasOneUse) continue; diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 83e8624fe09df..340b70eb02689 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -62,6 +62,15 @@ void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } +void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPSCCPPass()); +} + +void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { + unwrap(PM)->add(createInternalizePass(AllButMain != 0)); +} + + void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { // FIXME: Remove in LLVM 3.0. } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 03ec72c030432..b785bb0a9390e 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -73,16 +73,14 @@ InlinedArrayAllocasTy; /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, - const TargetData *TD, +static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. - SmallVector<AllocaInst*, 16> StaticAllocas; - if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) + if (!InlineFunction(CS, IFI)) return false; // If the inlined function had a higher stack protection level than the @@ -119,9 +117,9 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. - for (unsigned AllocaNo = 0, e = StaticAllocas.size(); + for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { - AllocaInst *AI = StaticAllocas[AllocaNo]; + AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose @@ -292,14 +290,29 @@ bool Inliner::shouldInline(CallSite CS) { return true; } -bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { +/// InlineHistoryIncludes - Return true if the specified inline history ID +/// indicates an inline history that includes the specified function. +static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, + const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { + while (InlineHistoryID != -1) { + assert(unsigned(InlineHistoryID) < InlineHistory.size() && + "Invalid inline history ID"); + if (InlineHistory[InlineHistoryID].first == F) + return true; + InlineHistoryID = InlineHistory[InlineHistoryID].second; + } + return false; +} + + +bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F) SCCFunctions.insert(F); DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } @@ -307,10 +320,16 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. - SmallVector<CallSite, 16> CallSites; - - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + SmallVector<std::pair<CallSite, int>, 16> CallSites; + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector<std::pair<Function*, int>, 8> InlineHistory; + + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (!F) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) @@ -327,22 +346,27 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) continue; - CallSites.push_back(CS); + CallSites.push_back(std::make_pair(CS, -1)); } } DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); + // If there are no calls in this function, exit early. + if (CallSites.empty()) + return false; + // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); for (unsigned i = 0; i < FirstCallInSCC; ++i) - if (Function *F = CallSites[i].getCalledFunction()) + if (Function *F = CallSites[i].first.getCalledFunction()) if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; + InlineFunctionInfo InlineInfo(&CG, TD); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -353,7 +377,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - CallSite CS = CallSites[CSi]; + CallSite CS = CallSites[CSi].first; Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); @@ -375,16 +399,42 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; + // If this call sites was obtained by inlining another function, verify + // that the include path for the function did not include the callee + // itself. If so, we'd be recursively inlinling the same function, + // which would provide the same callsites, which would cause us to + // infinitely inline. + int InlineHistoryID = CallSites[CSi].second; + if (InlineHistoryID != -1 && + InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) + continue; + + // If the policy determines that we should inline this function, // try to do so. if (!shouldInline(CS)) continue; - // Attempt to inline the function... - if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) + // Attempt to inline the function. + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) continue; ++NumInlined; - + + // If inlining this function gave us any new call sites, throw them + // onto our worklist to process. They are useful inline candidates. + if (!InlineInfo.InlinedCalls.empty()) { + // Create a new inline history entry for this, so that we remember + // that these new callsites came about due to inlining Callee. + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); + + for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); + i != e; ++i) { + Value *Ptr = InlineInfo.InlinedCalls[i]; + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + } + } + // Update the cached cost info with the inlined call. growCachedCostInfo(Caller, Callee); } @@ -417,7 +467,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // swap/pop_back for efficiency, but do not use it if doing so would // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. - if (SCC.size() == 1) { + if (SCC.isSingular()) { std::swap(CallSites[CSi], CallSites.back()); CallSites.pop_back(); } else { diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index f8ec722273807..07525eaada5ec 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -120,15 +120,17 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Extract the body of the if. Function* extractedFunction = ExtractCodeRegion(DT, toExtract); + InlineFunctionInfo IFI; + // Inline the top-level if test into all callers. std::vector<User*> Users(duplicateFunction->use_begin(), duplicateFunction->use_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) - if (CallInst* CI = dyn_cast<CallInst>(*UI)) - InlineFunction(CI); - else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI)) - InlineFunction(II); + if (CallInst *CI = dyn_cast<CallInst>(*UI)) + InlineFunction(CI, IFI); + else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) + InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 161246bc2598a..de6099cc1daa0 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -40,7 +40,7 @@ namespace { PruneEH() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(std::vector<CallGraphNode *> &SCC); + bool runOnSCC(CallGraphSCC &SCC); bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); @@ -54,20 +54,20 @@ X("prune-eh", "Remove unused exception handling info"); Pass *llvm::createPruneEHPass() { return new PruneEH(); } -bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; CallGraph &CG = getAnalysis<CallGraph>(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert(*I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (Function *F = SCC[i]->getFunction()) + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); // Next, check to see if any callees might throw or if there are any external @@ -78,9 +78,9 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; - for (unsigned i = 0, e = SCC.size(); - (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); + (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) { SCCMightUnwind = true; SCCMightReturn = true; @@ -132,7 +132,7 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Attributes NewAttributes = Attribute::None; if (!SCCMightUnwind) @@ -140,19 +140,20 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { if (!SCCMightReturn) NewAttributes |= Attribute::NoReturn; - const AttrListPtr &PAL = SCC[i]->getFunction()->getAttributes(); + Function *F = (*I)->getFunction(); + const AttrListPtr &PAL = F->getAttributes(); const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes); if (PAL != NPAL) { MadeChange = true; - SCC[i]->getFunction()->setAttributes(NPAL); + F->setAttributes(NPAL); } } - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. - if (Function *F = SCC[i]->getFunction()) + if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); } diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index dda32d02c873a..473e83cec45e1 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -48,7 +48,7 @@ namespace { CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC); + virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid SRETPromotion() : CallGraphSCCPass(&ID) {} @@ -69,12 +69,12 @@ Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); } -bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) { - SCC[i] = NewNode; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (CallGraphNode *NewNode = PromoteReturn(*I)) { + SCC.ReplaceNode(*I, NewNode); Changed = true; } |