diff options
Diffstat (limited to 'lib/Transforms/IPO')
31 files changed, 3384 insertions, 3512 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 0e05129b52617..0716a3a9cbe90 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -68,6 +69,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -78,19 +80,8 @@ namespace { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } - /// A vector used to hold the indices of a single GEP instruction - typedef std::vector<uint64_t> IndicesVector; - private: - bool isDenselyPacked(Type *type, const DataLayout &DL); - bool canPaddingBeAccessed(Argument *Arg); - CallGraphNode *PromoteArguments(CallGraphNode *CGN); - bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, - AAResults &AAR) const; - CallGraphNode *DoPromotion(Function *F, - SmallPtrSetImpl<Argument*> &ArgsToPromote, - SmallPtrSetImpl<Argument*> &ByValArgsToTransform); - + using llvm::Pass::doInitialization; bool doInitialization(CallGraph &CG) override; /// The maximum number of elements to expand, or 0 for unlimited. @@ -98,6 +89,21 @@ namespace { }; } +/// A vector used to hold the indices of a single GEP instruction +typedef std::vector<uint64_t> IndicesVector; + +static CallGraphNode * +PromoteArguments(CallGraphNode *CGN, CallGraph &CG, + function_ref<AAResults &(Function &F)> AARGetter, + unsigned MaxElements); +static bool isDenselyPacked(Type *type, const DataLayout &DL); +static bool canPaddingBeAccessed(Argument *Arg); +static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR, + unsigned MaxElements); +static CallGraphNode * +DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, + SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG); + char ArgPromotion::ID = 0; INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -111,16 +117,19 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); } -bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { +static bool runImpl(CallGraphSCC &SCC, CallGraph &CG, + function_ref<AAResults &(Function &F)> AARGetter, + unsigned MaxElements) { bool Changed = false, LocalChange; do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - if (CallGraphNode *CGN = PromoteArguments(*I)) { + for (CallGraphNode *OldNode : SCC) { + if (CallGraphNode *NewNode = + PromoteArguments(OldNode, CG, AARGetter, MaxElements)) { LocalChange = true; - SCC.ReplaceNode(*I, CGN); + SCC.ReplaceNode(OldNode, NewNode); } } Changed |= LocalChange; // Remember that we changed something. @@ -129,8 +138,30 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { return Changed; } +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; + + // Get the callgraph information that we need to update to reflect our + // changes. + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + + return runImpl(SCC, CG, AARGetter, maxElements); +} + /// \brief Checks if a type could have padding bytes. -bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) { +static bool isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) @@ -166,7 +197,7 @@ bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) { } /// \brief Checks if the padding bytes of an argument could be accessed. -bool ArgPromotion::canPaddingBeAccessed(Argument *arg) { +static bool canPaddingBeAccessed(Argument *arg) { assert(arg->hasByValAttr()); @@ -207,7 +238,10 @@ bool ArgPromotion::canPaddingBeAccessed(Argument *arg) { /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// -CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { +static CallGraphNode * +PromoteArguments(CallGraphNode *CGN, CallGraph &CG, + function_ref<AAResults &(Function &F)> AARGetter, + unsigned MaxElements) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. @@ -242,20 +276,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const DataLayout &DL = F->getParent()->getDataLayout(); - // We need to manually construct BasicAA directly in order to disable its use - // of other function analyses. - BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); - - // Construct our own AA results for this function. We do this manually to - // work around the limitations of the legacy pass manager. - AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); + AAResults &AAR = AARGetter(*F); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; - for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { - Argument *PtrArg = PointerArgs[i]; + for (Argument *PtrArg : PointerArgs) { Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // Replace sret attribute with noalias. This reduces register pressure by @@ -285,10 +312,10 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { - if (maxElements > 0 && STy->getNumElements() > maxElements) { + if (MaxElements > 0 && STy->getNumElements() > MaxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" - << " than " << maxElements << " arguments to the function.\n"); + << " than " << MaxElements << " arguments to the function.\n"); continue; } @@ -302,7 +329,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow scalarrepl to hack on + // Passing the elements as a scalar will allow sroa to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); @@ -328,7 +355,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, + MaxElements)) ArgsToPromote.insert(PtrArg); } @@ -336,7 +364,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG); } /// AllCallersPassInValidPointerForArgument - Return true if we can prove that @@ -364,8 +392,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { /// elements in Prefix is the same as the corresponding elements in Longer. /// /// This means it also returns true when Prefix and Longer are equal! -static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, - const ArgPromotion::IndicesVector &Longer) { +static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { if (Prefix.size() > Longer.size()) return false; return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); @@ -373,9 +400,9 @@ static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, /// Checks if Indices, or a prefix of Indices, is in Set. -static bool PrefixIn(const ArgPromotion::IndicesVector &Indices, - std::set<ArgPromotion::IndicesVector> &Set) { - std::set<ArgPromotion::IndicesVector>::iterator Low; +static bool PrefixIn(const IndicesVector &Indices, + std::set<IndicesVector> &Set) { + std::set<IndicesVector>::iterator Low; Low = Set.upper_bound(Indices); if (Low != Set.begin()) Low--; @@ -392,9 +419,9 @@ static bool PrefixIn(const ArgPromotion::IndicesVector &Indices, /// is already a prefix of Indices in Safe, Indices are implicitely marked safe /// already. Furthermore, any indices that Indices is itself a prefix of, are /// removed from Safe (since they are implicitely safe because of Indices now). -static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, - std::set<ArgPromotion::IndicesVector> &Safe) { - std::set<ArgPromotion::IndicesVector>::iterator Low; +static void MarkIndicesSafe(const IndicesVector &ToMark, + std::set<IndicesVector> &Safe) { + std::set<IndicesVector>::iterator Low; Low = Safe.upper_bound(ToMark); // Guard against the case where Safe is empty if (Low != Safe.begin()) @@ -415,9 +442,9 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, Low = Safe.insert(Low, ToMark); ++Low; // If there we're a prefix of longer index list(s), remove those - std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end(); + std::set<IndicesVector>::iterator End = Safe.end(); while (Low != End && IsPrefix(ToMark, *Low)) { - std::set<ArgPromotion::IndicesVector>::iterator Remove = Low; + std::set<IndicesVector>::iterator Remove = Low; ++Low; Safe.erase(Remove); } @@ -428,9 +455,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. -bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, - bool isByValOrInAlloca, - AAResults &AAR) const { +static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, + AAResults &AAR, unsigned MaxElements) { typedef std::set<IndicesVector> GEPIndicesSet; // Quick exit for unused arguments @@ -518,7 +544,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR); + return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, + MaxElements); } // Ensure that all of the indices are constants. @@ -552,10 +579,10 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // to make sure that we aren't promoting too many elements. If so, nothing // to do. if (ToPromote.find(Operands) == ToPromote.end()) { - if (maxElements > 0 && ToPromote.size() == maxElements) { + if (MaxElements > 0 && ToPromote.size() == MaxElements) { DEBUG(dbgs() << "argpromotion not promoting argument '" << Arg->getName() << "' because it would require adding more " - << "than " << maxElements << " arguments to the function.\n"); + << "than " << MaxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number // of elements of the aggregate. return false; @@ -575,10 +602,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // blocks we know to be transparent to the load. SmallPtrSet<BasicBlock*, 16> TranspBlocks; - for (unsigned i = 0, e = Loads.size(); i != e; ++i) { + for (LoadInst *Load : Loads) { // Check to see if the load is invalidated from the start of the block to // the load itself. - LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); MemoryLocation Loc = MemoryLocation::get(Load); @@ -604,9 +630,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. -CallGraphNode *ArgPromotion::DoPromotion(Function *F, - SmallPtrSetImpl<Argument*> &ArgsToPromote, - SmallPtrSetImpl<Argument*> &ByValArgsToTransform) { +static CallGraphNode * +DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, + SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. @@ -700,12 +726,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Add a parameter to the function for each element passed in. - for (ScalarizeTable::iterator SI = ArgIndices.begin(), - E = ArgIndices.end(); SI != E; ++SI) { + for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( cast<PointerType>(I->getType()->getScalarType())->getElementType(), - SI->second)); + ArgIndex.second)); assert(Params.back()); } @@ -745,10 +770,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Get the callgraph information that we need to update to reflect our - // changes. - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); - // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); @@ -800,27 +821,25 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; - for (ScalarizeTable::iterator SI = ArgIndices.begin(), - E = ArgIndices.end(); SI != E; ++SI) { + for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; - LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)]; - if (!SI->second.empty()) { - Ops.reserve(SI->second.size()); + LoadInst *OrigLoad = + OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; + if (!ArgIndex.second.empty()) { + Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); - for (IndicesVector::const_iterator II = SI->second.begin(), - IE = SI->second.end(); - II != IE; ++II) { + for (unsigned long II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); - Ops.push_back(ConstantInt::get(IdxTy, *II)); + Ops.push_back(ConstantInt::get(IdxTy, II)); // Keep track of the type we're currently indexing. - ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); + ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(SI->first, V, Ops, + V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); } @@ -852,15 +871,18 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, AttributesVec.push_back(AttributeSet::get(Call->getContext(), CallPAL.getFnAttributes())); + SmallVector<OperandBundleDef, 1> OpBundles; + CS.getOperandBundlesAsDefs(OpBundles); + Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, "", Call); + Args, OpBundles, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(), AttributesVec)); } else { - New = CallInst::Create(NF, Args, "", Call); + New = CallInst::Create(NF, Args, OpBundles, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(), AttributesVec)); diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 351b88fe2aa0b..d6782c738cbe1 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -19,7 +19,7 @@ add_llvm_library(LLVMipo Inliner.cpp Internalize.cpp LoopExtractor.cpp - LowerBitSets.cpp + LowerTypeTests.cpp MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp @@ -27,6 +27,7 @@ add_llvm_library(LLVMipo SampleProfile.cpp StripDeadPrototypes.cpp StripSymbols.cpp + WholeProgramDevirt.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 0aa49d6fde014..d75ed206ad23c 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -17,7 +17,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" @@ -28,41 +28,13 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "constmerge" STATISTIC(NumMerged, "Number of global constants merged"); -namespace { - struct ConstantMerge : public ModulePass { - static char ID; // Pass identification, replacement for typeid - ConstantMerge() : ModulePass(ID) { - initializeConstantMergePass(*PassRegistry::getPassRegistry()); - } - - // For this pass, process all of the globals in the module, eliminating - // duplicate constants. - bool runOnModule(Module &M) override; - - // Return true iff we can determine the alignment of this global variable. - bool hasKnownAlignment(GlobalVariable *GV) const; - - // Return the alignment of the global, including converting the default - // alignment to a concrete value. - unsigned getAlignment(GlobalVariable *GV) const; - - }; -} - -char ConstantMerge::ID = 0; -INITIALIZE_PASS(ConstantMerge, "constmerge", - "Merge Duplicate Global Constants", false, false) - -ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } - - - /// Find values that are marked as llvm.used. static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSetImpl<const GlobalValue*> &UsedValues) { @@ -85,18 +57,17 @@ static bool IsBetterCanonical(const GlobalVariable &A, if (A.hasLocalLinkage() && !B.hasLocalLinkage()) return false; - return A.hasUnnamedAddr(); + return A.hasGlobalUnnamedAddr(); } -unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { +static unsigned getAlignment(GlobalVariable *GV) { unsigned Align = GV->getAlignment(); if (Align) return Align; return GV->getParent()->getDataLayout().getPreferredAlignment(GV); } -bool ConstantMerge::runOnModule(Module &M) { - +static bool mergeConstants(Module &M) { // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); @@ -181,11 +152,11 @@ bool ConstantMerge::runOnModule(Module &M) { if (!Slot || Slot == GV) continue; - if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr()) + if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr()) continue; - if (!GV->hasUnnamedAddr()) - Slot->setUnnamedAddr(false); + if (!GV->hasGlobalUnnamedAddr()) + Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None); // Make all uses of the duplicate constant use the canonical version. Replacements.push_back(std::make_pair(GV, Slot)); @@ -220,3 +191,34 @@ bool ConstantMerge::runOnModule(Module &M) { Replacements.clear(); } } + +PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { + if (!mergeConstants(M)) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} + +namespace { +struct ConstantMergeLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ConstantMergeLegacyPass() : ModulePass(ID) { + initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + // For this pass, process all of the globals in the module, eliminating + // duplicate constants. + bool runOnModule(Module &M) { + if (skipModule(M)) + return false; + return mergeConstants(M); + } +}; +} + +char ConstantMergeLegacyPass::ID = 0; +INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge", + "Merge Duplicate Global Constants", false, false) + +ModulePass *llvm::createConstantMergePass() { + return new ConstantMergeLegacyPass(); +} diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index 5bbb7513005c6..58731eaf6e30f 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" @@ -30,13 +30,14 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; #define DEBUG_TYPE "cross-dso-cfi" -STATISTIC(TypeIds, "Number of unique type identifiers"); +STATISTIC(NumTypeIds, "Number of unique type identifiers"); namespace { @@ -46,13 +47,10 @@ struct CrossDSOCFI : public ModulePass { initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry()); } - Module *M; MDNode *VeryLikelyWeights; - ConstantInt *extractBitSetTypeId(MDNode *MD); - void buildCFICheck(); - - bool doInitialization(Module &M) override; + ConstantInt *extractNumericTypeId(MDNode *MD); + void buildCFICheck(Module &M); bool runOnModule(Module &M) override; }; @@ -65,18 +63,10 @@ char CrossDSOCFI::ID = 0; ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; } -bool CrossDSOCFI::doInitialization(Module &Mod) { - M = &Mod; - VeryLikelyWeights = - MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1); - - return false; -} - -/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode. -ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) { +/// Extracts a numeric type identifier from an MDNode containing type metadata. +ConstantInt *CrossDSOCFI::extractNumericTypeId(MDNode *MD) { // This check excludes vtables for classes inside anonymous namespaces. - auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0)); + auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(1)); if (!TM) return nullptr; auto C = dyn_cast_or_null<ConstantInt>(TM->getValue()); @@ -84,68 +74,63 @@ ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) { // We are looking for i64 constants. if (C->getBitWidth() != 64) return nullptr; - // Sanity check. - auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1)); - // Can be null if a function was removed by an optimization. - if (FM) { - auto F = dyn_cast<Function>(FM->getValue()); - // But can never be a function declaration. - assert(!F || !F->isDeclaration()); - (void)F; // Suppress unused variable warning in the no-asserts build. - } return C; } /// buildCFICheck - emits __cfi_check for the current module. -void CrossDSOCFI::buildCFICheck() { +void CrossDSOCFI::buildCFICheck(Module &M) { // FIXME: verify that __cfi_check ends up near the end of the code section, - // but before the jump slots created in LowerBitSets. - llvm::DenseSet<uint64_t> BitSetIds; - NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets"); - - if (BitSetNM) - for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) - if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I))) - BitSetIds.insert(TypeId->getZExtValue()); - - LLVMContext &Ctx = M->getContext(); - Constant *C = M->getOrInsertFunction( - "__cfi_check", - FunctionType::get( - Type::getVoidTy(Ctx), - {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))}, - false)); + // but before the jump slots created in LowerTypeTests. + llvm::DenseSet<uint64_t> TypeIds; + SmallVector<MDNode *, 2> Types; + for (GlobalObject &GO : M.global_objects()) { + Types.clear(); + GO.getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + // Sanity check. GO must not be a function declaration. + assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration()); + + if (ConstantInt *TypeId = extractNumericTypeId(Type)) + TypeIds.insert(TypeId->getZExtValue()); + } + } + + LLVMContext &Ctx = M.getContext(); + Constant *C = M.getOrInsertFunction( + "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), + Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr); Function *F = dyn_cast<Function>(C); F->setAlignment(4096); auto args = F->arg_begin(); - Argument &CallSiteTypeId = *(args++); + Value &CallSiteTypeId = *(args++); CallSiteTypeId.setName("CallSiteTypeId"); - Argument &Addr = *(args++); + Value &Addr = *(args++); Addr.setName("Addr"); + Value &CFICheckFailData = *(args++); + CFICheckFailData.setName("CFICheckFailData"); assert(args == F->arg_end()); BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F); + BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F); - BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F); - IRBuilder<> IRBTrap(TrapBB); - Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); - llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn); - TrapCall->setDoesNotReturn(); - TrapCall->setDoesNotThrow(); - IRBTrap.CreateUnreachable(); + BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F); + IRBuilder<> IRBFail(TrapBB); + Constant *CFICheckFailFn = M.getOrInsertFunction( + "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), + Type::getInt8PtrTy(Ctx), nullptr); + IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr}); + IRBFail.CreateBr(ExitBB); - BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F); IRBuilder<> IRBExit(ExitBB); IRBExit.CreateRetVoid(); IRBuilder<> IRB(BB); - SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size()); - for (uint64_t TypeId : BitSetIds) { + SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, TypeIds.size()); + for (uint64_t TypeId : TypeIds) { ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId); BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F); IRBuilder<> IRBTest(TestBB); - Function *BitsetTestFn = - Intrinsic::getDeclaration(M, Intrinsic::bitset_test); + Function *BitsetTestFn = Intrinsic::getDeclaration(&M, Intrinsic::type_test); Value *Test = IRBTest.CreateCall( BitsetTestFn, {&Addr, MetadataAsValue::get( @@ -154,13 +139,26 @@ void CrossDSOCFI::buildCFICheck() { BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights); SI->addCase(CaseTypeId, TestBB); - ++TypeIds; + ++NumTypeIds; } } bool CrossDSOCFI::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + VeryLikelyWeights = + MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1); if (M.getModuleFlag("Cross-DSO CFI") == nullptr) return false; - buildCFICheck(); + buildCFICheck(M); return true; } + +PreservedAnalyses CrossDSOCFIPass::run(Module &M, AnalysisManager<Module> &AM) { + CrossDSOCFI Impl; + bool Changed = Impl.runOnModule(M); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4de3d95ab11dc..c8c895b187962 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -17,8 +17,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -35,8 +34,8 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include <map> #include <set> #include <tuple> using namespace llvm; @@ -51,77 +50,6 @@ namespace { /// DAE - The dead argument elimination pass. /// class DAE : public ModulePass { - public: - - /// Struct that represents (part of) either a return value or a function - /// argument. Used so that arguments and return values can be used - /// interchangeably. - struct RetOrArg { - RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), - IsArg(IsArg) {} - const Function *F; - unsigned Idx; - bool IsArg; - - /// Make RetOrArg comparable, so we can put it into a map. - bool operator<(const RetOrArg &O) const { - return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg); - } - - /// Make RetOrArg comparable, so we can easily iterate the multimap. - bool operator==(const RetOrArg &O) const { - return F == O.F && Idx == O.Idx && IsArg == O.IsArg; - } - - std::string getDescription() const { - return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) + - " of function " + F->getName()).str(); - } - }; - - /// Liveness enum - During our initial pass over the program, we determine - /// that things are either alive or maybe alive. We don't mark anything - /// explicitly dead (even if we know they are), since anything not alive - /// with no registered uses (in Uses) will never be marked alive and will - /// thus become dead in the end. - enum Liveness { Live, MaybeLive }; - - /// Convenience wrapper - RetOrArg CreateRet(const Function *F, unsigned Idx) { - return RetOrArg(F, Idx, false); - } - /// Convenience wrapper - RetOrArg CreateArg(const Function *F, unsigned Idx) { - return RetOrArg(F, Idx, true); - } - - typedef std::multimap<RetOrArg, RetOrArg> UseMap; - /// This maps a return value or argument to any MaybeLive return values or - /// arguments it uses. This allows the MaybeLive values to be marked live - /// when any of its users is marked live. - /// For example (indices are left out for clarity): - /// - Uses[ret F] = ret G - /// This means that F calls G, and F returns the value returned by G. - /// - Uses[arg F] = ret G - /// This means that some function calls G and passes its result as an - /// argument to F. - /// - Uses[ret F] = arg F - /// This means that F returns one of its own arguments. - /// - Uses[arg F] = arg G - /// This means that G calls F and passes one of its own (G's) arguments - /// directly to F. - UseMap Uses; - - typedef std::set<RetOrArg> LiveSet; - typedef std::set<const Function*> LiveFuncSet; - - /// This set contains all values that have been determined to be live. - LiveSet LiveValues; - /// This set contains all values that are cannot be changed in any way. - LiveFuncSet LiveFunctions; - - typedef SmallVector<RetOrArg, 5> UseVector; - protected: // DAH uses this to specify a different ID. explicit DAE(char &ID) : ModulePass(ID) {} @@ -132,25 +60,16 @@ namespace { initializeDAEPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M) override; + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + DeadArgumentEliminationPass DAEP(ShouldHackArguments()); + ModuleAnalysisManager DummyMAM; + PreservedAnalyses PA = DAEP.run(M, DummyMAM); + return !PA.areAllPreserved(); + } virtual bool ShouldHackArguments() const { return false; } - - private: - Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); - Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, - unsigned RetValNum = -1U); - Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); - - void SurveyFunction(const Function &F); - void MarkValue(const RetOrArg &RA, Liveness L, - const UseVector &MaybeLiveUses); - void MarkLive(const RetOrArg &RA); - void MarkLive(const Function &F); - void PropagateLiveness(const RetOrArg &RA); - bool RemoveDeadStuffFromFunction(Function *F); - bool DeleteDeadVarargs(Function &Fn); - bool RemoveDeadArgumentsFromCallers(Function &Fn); }; } @@ -183,7 +102,7 @@ ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } /// DeleteDeadVarargs - If this is an function that takes a ... list, and if /// llvm.vastart is never called, the varargs list is dead for the function. -bool DAE::DeleteDeadVarargs(Function &Fn) { +bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; @@ -200,9 +119,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Okay, we know we can transform this function if safe. Scan its body // looking for calls marked musttail or calls to llvm.vastart. - for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - CallInst *CI = dyn_cast<CallInst>(I); + for (BasicBlock &BB : Fn) { + for (Instruction &I : BB) { + CallInst *CI = dyn_cast<CallInst>(&I); if (!CI) continue; if (CI->isMustTailCall()) @@ -229,6 +148,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, Fn.getLinkage()); NF->copyAttributesFrom(&Fn); + NF->setComdat(Fn.getComdat()); Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); NF->takeName(&Fn); @@ -257,14 +177,17 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } + SmallVector<OperandBundleDef, 1> OpBundles; + CS.getOperandBundlesAsDefs(OpBundles); + Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, "", Call); + Args, OpBundles, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { - New = CallInst::Create(NF, Args, "", Call); + New = CallInst::Create(NF, Args, OpBundles, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) @@ -316,8 +239,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// RemoveDeadArgumentsFromCallers - Checks if the given function has any /// arguments that are unused, and changes the caller parameters to be undefined /// instead. -bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) -{ +bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { // We cannot change the arguments if this TU does not define the function or // if the linker may choose a function body from another TU, even if the // nominal linkage indicates that other copies of the function have the same @@ -329,7 +251,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) // %v = load i32 %p // ret void // } - if (!Fn.isStrongDefinitionForLinker()) + if (!Fn.hasExactDefinition()) return false; // Functions with local linkage should already have been handled, except the @@ -409,7 +331,9 @@ static Type *getRetComponentType(const Function *F, unsigned Idx) { /// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not /// live, it adds Use to the MaybeLiveUses argument. Returns the determined /// liveness of Use. -DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { +DeadArgumentEliminationPass::Liveness +DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, + UseVector &MaybeLiveUses) { // We're live if our use or its Function is already marked as live. if (LiveFunctions.count(Use.F) || LiveValues.count(Use)) return Live; @@ -428,8 +352,9 @@ DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. -DAE::Liveness DAE::SurveyUse(const Use *U, - UseVector &MaybeLiveUses, unsigned RetValNum) { +DeadArgumentEliminationPass::Liveness +DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses, + unsigned RetValNum) { const User *V = U->getUser(); if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { // The value is returned from a function. It's only live when the @@ -442,13 +367,14 @@ DAE::Liveness DAE::SurveyUse(const Use *U, // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); } else { - DAE::Liveness Result = MaybeLive; + DeadArgumentEliminationPass::Liveness Result = MaybeLive; for (unsigned i = 0; i < NumRetVals(F); ++i) { RetOrArg Use = CreateRet(F, i); // We might be live, depending on the liveness of Use. If any // sub-value is live, then the entire value is considered live. This // is a conservative choice, and better tracking is possible. - DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses); + DeadArgumentEliminationPass::Liveness SubResult = + MarkIfNotLive(Use, MaybeLiveUses); if (Result != Live) Result = SubResult; } @@ -514,7 +440,9 @@ DAE::Liveness DAE::SurveyUse(const Use *U, /// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If /// the result is Live, MaybeLiveUses might be modified but its content should /// be ignored (since it might not be complete). -DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { +DeadArgumentEliminationPass::Liveness +DeadArgumentEliminationPass::SurveyUses(const Value *V, + UseVector &MaybeLiveUses) { // Assume it's dead (which will only hold if there are no uses at all..). Liveness Result = MaybeLive; // Check each use. @@ -534,7 +462,7 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { // We consider arguments of non-internal functions to be intrinsically alive as // well as arguments to functions which have their "address taken". // -void DAE::SurveyFunction(const Function &F) { +void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Functions with inalloca parameters are expecting args in a particular // register and memory layout. if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) { @@ -570,12 +498,13 @@ void DAE::SurveyFunction(const Function &F) { return; } - if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) { + if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) { MarkLive(F); return; } - DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting callers for fn: " + << F.getName() << "\n"); // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; @@ -637,7 +566,8 @@ void DAE::SurveyFunction(const Function &F) { for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); - DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: " + << F.getName() << "\n"); // Now, check all of our arguments. unsigned i = 0; @@ -669,17 +599,16 @@ void DAE::SurveyFunction(const Function &F) { /// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses, /// such that RA will be marked live if any use in MaybeLiveUses gets marked /// live later on. -void DAE::MarkValue(const RetOrArg &RA, Liveness L, - const UseVector &MaybeLiveUses) { +void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L, + const UseVector &MaybeLiveUses) { switch (L) { case Live: MarkLive(RA); break; case MaybeLive: { // Note any uses of this value, so this return value can be // marked live whenever one of the uses becomes live. - for (UseVector::const_iterator UI = MaybeLiveUses.begin(), - UE = MaybeLiveUses.end(); UI != UE; ++UI) - Uses.insert(std::make_pair(*UI, RA)); + for (const auto &MaybeLiveUse : MaybeLiveUses) + Uses.insert(std::make_pair(MaybeLiveUse, RA)); break; } } @@ -689,8 +618,9 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// changed in any way. Additionally, /// mark any values that are used as this function's parameters or by its return /// values (according to Uses) live as well. -void DAE::MarkLive(const Function &F) { - DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); +void DeadArgumentEliminationPass::MarkLive(const Function &F) { + DEBUG(dbgs() << "DeadArgumentEliminationPass - Intrinsically live fn: " + << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. @@ -704,20 +634,21 @@ void DAE::MarkLive(const Function &F) { /// MarkLive - Mark the given return value or argument as live. Additionally, /// mark any values that are used by this value (according to Uses) live as /// well. -void DAE::MarkLive(const RetOrArg &RA) { +void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) { if (LiveFunctions.count(RA.F)) return; // Function was already marked Live. if (!LiveValues.insert(RA).second) return; // We were already marked Live. - DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking " + << RA.getDescription() << " live\n"); PropagateLiveness(RA); } /// PropagateLiveness - Given that RA is a live value, propagate it's liveness /// to any other values it uses (according to Uses). -void DAE::PropagateLiveness(const RetOrArg &RA) { +void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { // We don't use upper_bound (or equal_range) here, because our recursive call // to ourselves is likely to cause the upper_bound (which is the first value // not belonging to RA) to become erased and the iterator invalidated. @@ -736,7 +667,7 @@ void DAE::PropagateLiveness(const RetOrArg &RA) { // that are not in LiveValues. Transform the function and all of the callees of // the function to not have these arguments and return values. // -bool DAE::RemoveDeadStuffFromFunction(Function *F) { +bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Don't modify fully live functions if (LiveFunctions.count(F)) return false; @@ -777,8 +708,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } } else { ++NumArgumentsEliminated; - DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() - << ") from " << F->getName() << "\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " << i + << " (" << I->getName() << ") from " << F->getName() + << "\n"); } } @@ -821,8 +753,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; - DEBUG(dbgs() << "DAE - Removing return value " << i << " from " - << F->getName() << "\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing return value " + << i << " from " << F->getName() << "\n"); } } if (RetTypes.size() > 1) { @@ -882,6 +814,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, F->getLinkage()); NF->copyAttributesFrom(F); + NF->setComdat(F->getComdat()); NF->setAttributes(NewPAL); // Insert the new function before the old function, so we won't be processing // it again. @@ -950,14 +883,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); + SmallVector<OperandBundleDef, 1> OpBundles; + CS.getOperandBundlesAsDefs(OpBundles); + Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, "", Call->getParent()); + Args, OpBundles, "", Call->getParent()); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { - New = CallInst::Create(NF, Args, "", Call); + New = CallInst::Create(NF, Args, OpBundles, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) @@ -1045,8 +981,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // If we change the return value of the function we must rewrite any return // instructions. Check this now. if (F->getReturnType() != NF->getReturnType()) - for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + for (BasicBlock &BB : *NF) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { Value *RetVal; if (NFTy->getReturnType()->isVoidTy()) { @@ -1081,7 +1017,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Replace the return instruction with one returning the new return // value (possibly 0 if we became void). ReturnInst::Create(F->getContext(), RetVal, RI); - BB->getInstList().erase(RI); + BB.getInstList().erase(RI); } // Patch the pointer to LLVM function in debug info descriptor. @@ -1093,14 +1029,15 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { return true; } -bool DAE::runOnModule(Module &M) { +PreservedAnalyses DeadArgumentEliminationPass::run(Module &M, + ModuleAnalysisManager &) { bool Changed = false; // First pass: Do a simple check to see if any functions can have their "..." // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates // information computed while surveying other functions. - DEBUG(dbgs() << "DAE - Deleting dead varargs\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { Function &F = *I++; if (F.getFunctionType()->isVarArg()) @@ -1111,7 +1048,7 @@ bool DAE::runOnModule(Module &M) { // We assume all arguments are dead unless proven otherwise (allowing us to // determine that dead arguments passed into recursive functions are dead). // - DEBUG(dbgs() << "DAE - Determining liveness\n"); + DEBUG(dbgs() << "DeadArgumentEliminationPass - Determining liveness\n"); for (auto &F : M) SurveyFunction(F); @@ -1129,5 +1066,7 @@ bool DAE::runOnModule(Module &M) { for (auto &F : M) Changed |= RemoveDeadArgumentsFromCallers(F); - return Changed; + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); } diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp index af313a6b001d7..98c4b1740306d 100644 --- a/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -13,10 +13,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" using namespace llvm; @@ -26,30 +27,7 @@ using namespace llvm; STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); -namespace { -struct EliminateAvailableExternally : public ModulePass { - static char ID; // Pass identification, replacement for typeid - EliminateAvailableExternally() : ModulePass(ID) { - initializeEliminateAvailableExternallyPass( - *PassRegistry::getPassRegistry()); - } - - // run - Do the EliminateAvailableExternally pass on the specified module, - // optionally updating the specified callgraph to reflect the changes. - // - bool runOnModule(Module &M) override; -}; -} - -char EliminateAvailableExternally::ID = 0; -INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern", - "Eliminate Available Externally Globals", false, false) - -ModulePass *llvm::createEliminateAvailableExternallyPass() { - return new EliminateAvailableExternally(); -} - -bool EliminateAvailableExternally::runOnModule(Module &M) { +static bool eliminateAvailableExternally(Module &M) { bool Changed = false; // Drop initializers of available externally global variables. @@ -82,3 +60,37 @@ bool EliminateAvailableExternally::runOnModule(Module &M) { return Changed; } + +PreservedAnalyses +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { + if (!eliminateAvailableExternally(M)) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} + +namespace { +struct EliminateAvailableExternallyLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + EliminateAvailableExternallyLegacyPass() : ModulePass(ID) { + initializeEliminateAvailableExternallyLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + // run - Do the EliminateAvailableExternally pass on the specified module, + // optionally updating the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M) { + if (skipModule(M)) + return false; + return eliminateAvailableExternally(M); + } +}; +} + +char EliminateAvailableExternallyLegacyPass::ID = 0; +INITIALIZE_PASS(EliminateAvailableExternallyLegacyPass, "elim-avail-extern", + "Eliminate Available Externally Globals", false, false) + +ModulePass *llvm::createEliminateAvailableExternallyPass() { + return new EliminateAvailableExternallyLegacyPass(); +} diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 1a3b9253d72fc..479fd182598a7 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -68,6 +68,9 @@ namespace { : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + // Visit the global inline asm. if (!deleteStuff) M.setModuleInlineAsm(""); @@ -101,20 +104,20 @@ namespace { } // Visit the Functions. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + for (Function &F : M) { bool Delete = - deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); + deleteStuff == (bool)Named.count(&F) && !F.isDeclaration(); if (!Delete) { - if (I->hasAvailableExternallyLinkage()) + if (F.hasAvailableExternallyLinkage()) continue; } - makeVisible(*I, Delete); + makeVisible(F, Delete); if (Delete) { // Make this a declaration and drop it's comdat. - I->deleteBody(); - I->setComdat(nullptr); + F.deleteBody(); + F.setComdat(nullptr); } } @@ -128,7 +131,7 @@ namespace { makeVisible(*CurI, Delete); if (Delete) { - Type *Ty = CurI->getType()->getElementType(); + Type *Ty = CurI->getValueType(); CurI->removeFromParent(); llvm::Value *Declaration; diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp index 6df044762cf45..968712138208f 100644 --- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -80,7 +80,8 @@ static void addForcedAttributes(Function &F) { } } -PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) { +PreservedAnalyses ForceFunctionAttrsPass::run(Module &M, + ModuleAnalysisManager &) { if (ForceAttributes.empty()) return PreservedAnalyses::all(); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 527fdd1885a4f..fff5440854148 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" @@ -52,38 +53,6 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet; } namespace { -struct PostOrderFunctionAttrs : public CallGraphSCCPass { - static char ID; // Pass identification, replacement for typeid - PostOrderFunctionAttrs() : CallGraphSCCPass(ID) { - initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnSCC(CallGraphSCC &SCC) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - CallGraphSCCPass::getAnalysisUsage(AU); - } - -private: - TargetLibraryInfo *TLI; -}; -} - -char PostOrderFunctionAttrs::ID = 0; -INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs", - "Deduce function attributes", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs", - "Deduce function attributes", false, false) - -Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); } - -namespace { /// The three kinds of memory access relevant to 'readonly' and /// 'readnone' attributes. enum MemoryAccessKind { @@ -100,9 +69,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, // Already perfect! return MAK_ReadNone; - // Definitions with weak linkage may be overridden at linktime with - // something that writes memory, so treat them like declarations. - if (F.isDeclaration() || F.mayBeOverridden()) { + // Non-exact function definitions may not be selected at link time, and an + // alternative version that writes to memory may be selected. See the comment + // on GlobalValue::isDefinitionExact for more details. + if (!F.hasExactDefinition()) { if (AliasAnalysis::onlyReadsMemory(MRB)) return MAK_ReadOnly; @@ -119,8 +89,12 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); if (CS) { - // Ignore calls to functions in the same SCC. - if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + // Ignore calls to functions in the same SCC, as long as the call sites + // don't have operand bundles. Calls with operand bundles are allowed to + // have memory effects not described by the memory effects of the call + // target. + if (!CS.hasOperandBundles() && CS.getCalledFunction() && + SCCNodes.count(CS.getCalledFunction())) continue; FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); @@ -311,8 +285,7 @@ struct ArgumentUsesTracker : public CaptureTracker { } Function *F = CS.getCalledFunction(); - if (!F || F->isDeclaration() || F->mayBeOverridden() || - !SCCNodes.count(F)) { + if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { Captured = true; return true; } @@ -490,6 +463,11 @@ determinePointerReadAttrs(Argument *A, } case Instruction::Load: + // A volatile load has side effects beyond what readonly can be relied + // upon. + if (cast<LoadInst>(I)->isVolatile()) + return Attribute::None; + IsRead = true; break; @@ -517,9 +495,10 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { // Check each function in turn, determining which pointer arguments are not // captured. for (Function *F : SCCNodes) { - // Definitions with weak linkage may be overridden at linktime with - // something that captures pointers, so treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) + // We can infer and propagate function attributes only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F->hasExactDefinition()) continue; // Functions that are readonly (or readnone) and nounwind and don't return @@ -557,12 +536,9 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { // then it must be calling into another function in our SCC. Save // its particulars for Argument-SCC analysis later. ArgumentGraphNode *Node = AG[&*A]; - for (SmallVectorImpl<Argument *>::iterator - UI = Tracker.Uses.begin(), - UE = Tracker.Uses.end(); - UI != UE; ++UI) { - Node->Uses.push_back(AG[*UI]); - if (*UI != A) + for (Argument *Use : Tracker.Uses) { + Node->Uses.push_back(AG[Use]); + if (Use != &*A) HasNonLocalUses = true; } } @@ -627,17 +603,15 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { SmallPtrSet<Argument *, 8> ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. - for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { - ArgumentSCCNodes.insert((*I)->Definition); + for (ArgumentGraphNode *I : ArgumentSCC) { + ArgumentSCCNodes.insert(I->Definition); } for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; - for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(), - UE = N->Uses.end(); - UI != UE; ++UI) { - Argument *A = (*UI)->Definition; + for (ArgumentGraphNode *Use : N->Uses) { + Argument *A = Use->Definition; if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) continue; SCCCaptured = true; @@ -703,8 +677,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { /// doesn't alias any other pointer visible to the caller. static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { SmallSetVector<Value *, 8> FlowsToReturn; - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) - if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) + for (BasicBlock &BB : *F) + if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) FlowsToReturn.insert(Ret->getReturnValue()); for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { @@ -772,9 +746,10 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { if (F->doesNotAlias(0)) continue; - // Definitions with weak linkage may be overridden at linktime, so - // treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) + // We can infer and propagate function attributes only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F->hasExactDefinition()) return false; // We annotate noalias return values, which are only applicable to @@ -807,7 +782,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { /// \p Speculative based on whether the returned conclusion is a speculative /// conclusion due to SCC calls. static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, - const TargetLibraryInfo &TLI, bool &Speculative) { + bool &Speculative) { assert(F->getReturnType()->isPointerTy() && "nonnull only meaningful on pointer types"); Speculative = false; @@ -821,7 +796,7 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, Value *RetVal = FlowsToReturn[i]; // If this value is locally known to be non-null, we're good - if (isKnownNonNull(RetVal, &TLI)) + if (isKnownNonNull(RetVal)) continue; // Otherwise, we need to look upwards since we can't make any local @@ -870,8 +845,7 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, } /// Deduce nonnull attributes for the SCC. -static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, - const TargetLibraryInfo &TLI) { +static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) { // Speculative that all functions in the SCC return only nonnull // pointers. We may refute this as we analyze functions. bool SCCReturnsNonNull = true; @@ -886,9 +860,10 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, Attribute::NonNull)) continue; - // Definitions with weak linkage may be overridden at linktime, so - // treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) + // We can infer and propagate function attributes only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F->hasExactDefinition()) return false; // We annotate nonnull return values, which are only applicable to @@ -897,7 +872,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, continue; bool Speculative = false; - if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) { + if (isReturnNonNull(F, SCCNodes, Speculative)) { if (!Speculative) { // Mark the function eagerly since we may discover a function // which prevents us from speculating about the entire SCC @@ -930,6 +905,49 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, return MadeChange; } +/// Remove the convergent attribute from all functions in the SCC if every +/// callsite within the SCC is not convergent (except for calls to functions +/// within the SCC). Returns true if changes were made. +static bool removeConvergentAttrs(const SCCNodeSet &SCCNodes) { + // For every function in SCC, ensure that either + // * it is not convergent, or + // * we can remove its convergent attribute. + bool HasConvergentFn = false; + for (Function *F : SCCNodes) { + if (!F->isConvergent()) continue; + HasConvergentFn = true; + + // Can't remove convergent from function declarations. + if (F->isDeclaration()) return false; + + // Can't remove convergent if any of our functions has a convergent call to a + // function not in the SCC. + for (Instruction &I : instructions(*F)) { + CallSite CS(&I); + // Bail if CS is a convergent call to a function not in the SCC. + if (CS && CS.isConvergent() && + SCCNodes.count(CS.getCalledFunction()) == 0) + return false; + } + } + + // If the SCC doesn't have any convergent functions, we have nothing to do. + if (!HasConvergentFn) return false; + + // If we got here, all of the calls the SCC makes to functions not in the SCC + // are non-convergent. Therefore all of the SCC's functions can also be made + // non-convergent. We'll remove the attr from the callsites in + // InstCombineCalls. + for (Function *F : SCCNodes) { + if (!F->isConvergent()) continue; + + DEBUG(dbgs() << "Removing convergent attr from fn " << F->getName() + << "\n"); + F->setNotConvergent(); + } + return true; +} + static bool setDoesNotRecurse(Function &F) { if (F.doesNotRecurse()) return false; @@ -938,56 +956,129 @@ static bool setDoesNotRecurse(Function &F) { return true; } -static bool addNoRecurseAttrs(const CallGraphSCC &SCC) { +static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { // Try and identify functions that do not recurse. // If the SCC contains multiple nodes we know for sure there is recursion. - if (!SCC.isSingular()) + if (SCCNodes.size() != 1) return false; - const CallGraphNode *CGN = *SCC.begin(); - Function *F = CGN->getFunction(); + Function *F = *SCCNodes.begin(); if (!F || F->isDeclaration() || F->doesNotRecurse()) return false; // If all of the calls in F are identifiable and are to norecurse functions, F // is norecurse. This check also detects self-recursion as F is not currently // marked norecurse, so any called from F to F will not be marked norecurse. - if (std::all_of(CGN->begin(), CGN->end(), - [](const CallGraphNode::CallRecord &CR) { - Function *F = CR.second->getFunction(); - return F && F->doesNotRecurse(); - })) - // Function calls a potentially recursive function. - return setDoesNotRecurse(*F); - - // Nothing else we can deduce usefully during the postorder traversal. - return false; + for (Instruction &I : instructions(*F)) + if (auto CS = CallSite(&I)) { + Function *Callee = CS.getCalledFunction(); + if (!Callee || Callee == F || !Callee->doesNotRecurse()) + // Function calls a potentially recursive function. + return false; + } + + // Every call was to a non-recursive function other than this function, and + // we have no indirect recursion as the SCC size is one. This function cannot + // recurse. + return setDoesNotRecurse(*F); } -bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) { - TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - bool Changed = false; +PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager(); - // We compute dedicated AA results for each function in the SCC as needed. We - // use a lambda referencing external objects so that they live long enough to - // be queried, but we re-use them each time. - Optional<BasicAAResult> BAR; - Optional<AAResults> AAR; + // We pass a lambda into functions to wire them up to the analysis manager + // for getting function analyses. auto AARGetter = [&](Function &F) -> AAResults & { - BAR.emplace(createLegacyPMBasicAAResult(*this, F)); - AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); - return *AAR; + return FAM.getResult<AAManager>(F); }; + // Fill SCCNodes with the elements of the SCC. Also track whether there are + // any external or opt-none nodes that will prevent us from optimizing any + // part of the SCC. + SCCNodeSet SCCNodes; + bool HasUnknownCall = false; + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // Treat any function we're trying not to optimize as if it were an + // indirect call and omit it from the node set used below. + HasUnknownCall = true; + continue; + } + // Track whether any functions in this SCC have an unknown call edge. + // Note: if this is ever a performance hit, we can common it with + // subsequent routines which also do scans over the instructions of the + // function. + if (!HasUnknownCall) + for (Instruction &I : instructions(F)) + if (auto CS = CallSite(&I)) + if (!CS.getCalledFunction()) { + HasUnknownCall = true; + break; + } + + SCCNodes.insert(&F); + } + + bool Changed = false; + Changed |= addReadAttrs(SCCNodes, AARGetter); + Changed |= addArgumentAttrs(SCCNodes); + + // If we have no external nodes participating in the SCC, we can deduce some + // more precise attributes as well. + if (!HasUnknownCall) { + Changed |= addNoAliasAttrs(SCCNodes); + Changed |= addNonNullAttrs(SCCNodes); + Changed |= removeConvergentAttrs(SCCNodes); + Changed |= addNoRecurseAttrs(SCCNodes); + } + + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +namespace { +struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { + initializePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnSCC(CallGraphSCC &SCC) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AssumptionCacheTracker>(); + getAAResultsAnalysisUsage(AU); + CallGraphSCCPass::getAnalysisUsage(AU); + } +}; +} + +char PostOrderFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs", + "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs", + "Deduce function attributes", false, false) + +Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { return new PostOrderFunctionAttrsLegacyPass(); } + +template <typename AARGetterT> +static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { + bool Changed = false; + // Fill SCCNodes with the elements of the SCC. Used for quickly looking up // whether a given CallGraphNode is in this SCC. Also track whether there are // any external or opt-none nodes that will prevent us from optimizing any // part of the SCC. SCCNodeSet SCCNodes; bool ExternalNode = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + for (CallGraphNode *I : SCC) { + Function *F = I->getFunction(); if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) { // External node or function we're trying not to optimize - we both avoid // transform them and avoid leveraging information they provide. @@ -1005,28 +1096,37 @@ bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) { // more precise attributes as well. if (!ExternalNode) { Changed |= addNoAliasAttrs(SCCNodes); - Changed |= addNonNullAttrs(SCCNodes, *TLI); + Changed |= addNonNullAttrs(SCCNodes); + Changed |= removeConvergentAttrs(SCCNodes); + Changed |= addNoRecurseAttrs(SCCNodes); } - Changed |= addNoRecurseAttrs(SCC); return Changed; } +bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; + + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + + return runImpl(SCC, AARGetter); +} + namespace { -/// A pass to do RPO deduction and propagation of function attributes. -/// -/// This pass provides a general RPO or "top down" propagation of -/// function attributes. For a few (rare) cases, we can deduce significantly -/// more about function attributes by working in RPO, so this pass -/// provides the compliment to the post-order pass above where the majority of -/// deduction is performed. -// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so -// this is a boring module pass, but eventually it should be an RPO CGSCC pass -// when such infrastructure is available. -struct ReversePostOrderFunctionAttrs : public ModulePass { +struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid - ReversePostOrderFunctionAttrs() : ModulePass(ID) { - initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); + ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeReversePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; @@ -1034,19 +1134,20 @@ struct ReversePostOrderFunctionAttrs : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<CallGraphWrapperPass>(); + AU.addPreserved<CallGraphWrapperPass>(); } }; } -char ReversePostOrderFunctionAttrs::ID = 0; -INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs", +char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs", "Deduce function attributes in RPO", false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs", +INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs", "Deduce function attributes in RPO", false, false) Pass *llvm::createReversePostOrderFunctionAttrsPass() { - return new ReversePostOrderFunctionAttrs(); + return new ReversePostOrderFunctionAttrsLegacyPass(); } static bool addNoRecurseAttrsTopDown(Function &F) { @@ -1078,7 +1179,7 @@ static bool addNoRecurseAttrsTopDown(Function &F) { return setDoesNotRecurse(F); } -bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { +static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { // We only have a post-order SCC traversal (because SCCs are inherently // discovered in post-order), so we accumulate them in a vector and then walk // it in reverse. This is simpler than using the RPO iterator infrastructure @@ -1086,7 +1187,6 @@ bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { // graph. We can also cheat egregiously because we're primarily interested in // synthesizing norecurse and so we can only save the singular SCCs as SCCs // with multiple functions in them will clearly be recursive. - auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); SmallVector<Function *, 16> Worklist; for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { if (I->size() != 1) @@ -1104,3 +1204,24 @@ bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { return Changed; } + +bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + + return deduceFunctionAttributeInRPO(M, CG); +} + +PreservedAnalyses +ReversePostOrderFunctionAttrsPass::run(Module &M, AnalysisManager<Module> &AM) { + auto &CG = AM.getResult<CallGraphAnalysis>(M); + + bool Changed = deduceFunctionAttributeInRPO(M, CG); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<CallGraphAnalysis>(); + return PA; +} diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 5e0df95051192..c9d075e763250 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -13,329 +13,670 @@ #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" -#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/Utils/FunctionImportUtils.h" -#include <map> +#define DEBUG_TYPE "function-import" using namespace llvm; -#define DEBUG_TYPE "function-import" +STATISTIC(NumImported, "Number of functions imported"); /// Limit on instruction count of imported functions. static cl::opt<unsigned> ImportInstrLimit( "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"), cl::desc("Only import functions with less than N instructions")); +static cl::opt<float> + ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7), + cl::Hidden, cl::value_desc("x"), + cl::desc("As we import functions, multiply the " + "`import-instr-limit` threshold by this factor " + "before processing newly imported functions")); + +static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden, + cl::desc("Print imported functions")); + +// Temporary allows the function import pass to disable always linking +// referenced discardable symbols. +static cl::opt<bool> + DontForceImportReferencedDiscardableSymbols("disable-force-link-odr", + cl::init(false), cl::Hidden); + +static cl::opt<bool> EnableImportMetadata( + "enable-import-metadata", cl::init( +#if !defined(NDEBUG) + true /*Enabled with asserts.*/ +#else + false +#endif + ), + cl::Hidden, cl::desc("Enable import metadata like 'thinlto_src_module'")); + // Load lazily a module from \p FileName in \p Context. static std::unique_ptr<Module> loadFile(const std::string &FileName, LLVMContext &Context) { SMDiagnostic Err; DEBUG(dbgs() << "Loading '" << FileName << "'\n"); - // Metadata isn't loaded or linked until after all functions are - // imported, after which it will be materialized and linked. + // Metadata isn't loaded until functions are imported, to minimize + // the memory overhead. std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context, /* ShouldLazyLoadMetadata = */ true); if (!Result) { Err.print("function-import", errs()); - return nullptr; + report_fatal_error("Abort"); } return Result; } namespace { -/// Helper to load on demand a Module from file and cache it for subsequent -/// queries. It can be used with the FunctionImporter. -class ModuleLazyLoaderCache { - /// Cache of lazily loaded module for import. - StringMap<std::unique_ptr<Module>> ModuleMap; - /// Retrieve a Module from the cache or lazily load it on demand. - std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule; +// Return true if the Summary describes a GlobalValue that can be externally +// referenced, i.e. it does not need renaming (linkage is not local) or renaming +// is possible (does not have a section for instance). +static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) { + if (!Summary.needsRenaming()) + return true; -public: - /// Create the loader, Module will be initialized in \p Context. - ModuleLazyLoaderCache(std::function< - std::unique_ptr<Module>(StringRef FileName)> createLazyModule) - : createLazyModule(createLazyModule) {} - - /// Retrieve a Module from the cache or lazily load it on demand. - Module &operator()(StringRef FileName); - - std::unique_ptr<Module> takeModule(StringRef FileName) { - auto I = ModuleMap.find(FileName); - assert(I != ModuleMap.end()); - std::unique_ptr<Module> Ret = std::move(I->second); - ModuleMap.erase(I); - return Ret; - } -}; + if (Summary.hasSection()) + // Can't rename a global that needs renaming if has a section. + return false; -// Get a Module for \p FileName from the cache, or load it lazily. -Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) { - auto &Module = ModuleMap[Identifier]; - if (!Module) - Module = createLazyModule(Identifier); - return *Module; + return true; } -} // anonymous namespace -/// Walk through the instructions in \p F looking for external -/// calls not already in the \p CalledFunctions set. If any are -/// found they are added to the \p Worklist for importing. -static void findExternalCalls(const Module &DestModule, Function &F, - const FunctionInfoIndex &Index, - StringSet<> &CalledFunctions, - SmallVector<StringRef, 64> &Worklist) { - // We need to suffix internal function calls imported from other modules, - // prepare the suffix ahead of time. - std::string Suffix; - if (F.getParent() != &DestModule) - Suffix = - (Twine(".llvm.") + - Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str(); - - for (auto &BB : F) { - for (auto &I : BB) { - if (isa<CallInst>(I)) { - auto CalledFunction = cast<CallInst>(I).getCalledFunction(); - // Insert any new external calls that have not already been - // added to set/worklist. - if (!CalledFunction || !CalledFunction->hasName()) - continue; - // Ignore intrinsics early - if (CalledFunction->isIntrinsic()) { - assert(CalledFunction->getIntrinsicID() != 0); - continue; - } - auto ImportedName = CalledFunction->getName(); - auto Renamed = (ImportedName + Suffix).str(); - // Rename internal functions - if (CalledFunction->hasInternalLinkage()) { - ImportedName = Renamed; - } - auto It = CalledFunctions.insert(ImportedName); - if (!It.second) { - // This is a call to a function we already considered, skip. - continue; - } - // Ignore functions already present in the destination module - auto *SrcGV = DestModule.getNamedValue(ImportedName); - if (SrcGV) { - if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV)) - SrcGV = SGA->getBaseObject(); - assert(isa<Function>(SrcGV) && "Name collision during import"); - if (!cast<Function>(SrcGV)->isDeclaration()) { - DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " - << ImportedName << " already in DestinationModule\n"); - continue; - } +// Return true if \p GUID describes a GlobalValue that can be externally +// referenced, i.e. it does not need renaming (linkage is not local) or +// renaming is possible (does not have a section for instance). +static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index, + GlobalValue::GUID GUID) { + auto Summaries = Index.findGlobalValueSummaryList(GUID); + if (Summaries == Index.end()) + return true; + if (Summaries->second.size() != 1) + // If there are multiple globals with this GUID, then we know it is + // not a local symbol, and it is necessarily externally referenced. + return true; + + // We don't need to check for the module path, because if it can't be + // externally referenced and we call it, it is necessarilly in the same + // module + return canBeExternallyReferenced(**Summaries->second.begin()); +} + +// Return true if the global described by \p Summary can be imported in another +// module. +static bool eligibleForImport(const ModuleSummaryIndex &Index, + const GlobalValueSummary &Summary) { + if (!canBeExternallyReferenced(Summary)) + // Can't import a global that needs renaming if has a section for instance. + // FIXME: we may be able to import it by copying it without promotion. + return false; + + // Check references (and potential calls) in the same module. If the current + // value references a global that can't be externally referenced it is not + // eligible for import. + bool AllRefsCanBeExternallyReferenced = + llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) { + return canBeExternallyReferenced(Index, VI.getGUID()); + }); + if (!AllRefsCanBeExternallyReferenced) + return false; + + if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) { + bool AllCallsCanBeExternallyReferenced = llvm::all_of( + FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) { + return canBeExternallyReferenced(Index, Edge.first.getGUID()); + }); + if (!AllCallsCanBeExternallyReferenced) + return false; + } + return true; +} + +/// Given a list of possible callee implementation for a call site, select one +/// that fits the \p Threshold. +/// +/// FIXME: select "best" instead of first that fits. But what is "best"? +/// - The smallest: more likely to be inlined. +/// - The one with the least outgoing edges (already well optimized). +/// - One from a module already being imported from in order to reduce the +/// number of source modules parsed/linked. +/// - One that has PGO data attached. +/// - [insert you fancy metric here] +static const GlobalValueSummary * +selectCallee(const ModuleSummaryIndex &Index, + const GlobalValueSummaryList &CalleeSummaryList, + unsigned Threshold) { + auto It = llvm::find_if( + CalleeSummaryList, + [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) { + auto *GVSummary = SummaryPtr.get(); + if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) + // There is no point in importing these, we can't inline them + return false; + if (auto *AS = dyn_cast<AliasSummary>(GVSummary)) { + GVSummary = &AS->getAliasee(); + // Alias can't point to "available_externally". However when we import + // linkOnceODR the linkage does not change. So we import the alias + // and aliasee only in this case. + // FIXME: we should import alias as available_externally *function*, + // the destination module does need to know it is an alias. + if (!GlobalValue::isLinkOnceODRLinkage(GVSummary->linkage())) + return false; } - Worklist.push_back(It.first->getKey()); - DEBUG(dbgs() << DestModule.getModuleIdentifier() - << ": Adding callee for : " << ImportedName << " : " - << F.getName() << "\n"); - } - } + auto *Summary = cast<FunctionSummary>(GVSummary); + + if (Summary->instCount() > Threshold) + return false; + + if (!eligibleForImport(Index, *Summary)) + return false; + + return true; + }); + if (It == CalleeSummaryList.end()) + return nullptr; + + return cast<GlobalValueSummary>(It->get()); +} + +/// Return the summary for the function \p GUID that fits the \p Threshold, or +/// null if there's no match. +static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID, + unsigned Threshold, + const ModuleSummaryIndex &Index) { + auto CalleeSummaryList = Index.findGlobalValueSummaryList(GUID); + if (CalleeSummaryList == Index.end()) + return nullptr; // This function does not have a summary + return selectCallee(Index, CalleeSummaryList->second, Threshold); +} + +/// Mark the global \p GUID as export by module \p ExportModulePath if found in +/// this module. If it is a GlobalVariable, we also mark any referenced global +/// in the current module as exported. +static void exportGlobalInModule(const ModuleSummaryIndex &Index, + StringRef ExportModulePath, + GlobalValue::GUID GUID, + FunctionImporter::ExportSetTy &ExportList) { + auto FindGlobalSummaryInModule = + [&](GlobalValue::GUID GUID) -> GlobalValueSummary *{ + auto SummaryList = Index.findGlobalValueSummaryList(GUID); + if (SummaryList == Index.end()) + // This global does not have a summary, it is not part of the ThinLTO + // process + return nullptr; + auto SummaryIter = llvm::find_if( + SummaryList->second, + [&](const std::unique_ptr<GlobalValueSummary> &Summary) { + return Summary->modulePath() == ExportModulePath; + }); + if (SummaryIter == SummaryList->second.end()) + return nullptr; + return SummaryIter->get(); + }; + + auto *Summary = FindGlobalSummaryInModule(GUID); + if (!Summary) + return; + // We found it in the current module, mark as exported + ExportList.insert(GUID); + + auto GVS = dyn_cast<GlobalVarSummary>(Summary); + if (!GVS) + return; + // FunctionImportGlobalProcessing::doPromoteLocalToGlobal() will always + // trigger importing the initializer for `constant unnamed addr` globals that + // are referenced. We conservatively export all the referenced symbols for + // every global to workaround this, so that the ExportList is accurate. + // FIXME: with a "isConstant" flag in the summary we could be more targetted. + for (auto &Ref : GVS->refs()) { + auto GUID = Ref.getGUID(); + auto *RefSummary = FindGlobalSummaryInModule(GUID); + if (RefSummary) + // Found a ref in the current module, mark it as exported + ExportList.insert(GUID); } } -// Helper function: given a worklist and an index, will process all the worklist -// and decide what to import based on the summary information. -// -// Nothing is actually imported, functions are materialized in their source -// module and analyzed there. -// -// \p ModuleToFunctionsToImportMap is filled with the set of Function to import -// per Module. -static void GetImportList(Module &DestModule, - SmallVector<StringRef, 64> &Worklist, - StringSet<> &CalledFunctions, - std::map<StringRef, DenseSet<const GlobalValue *>> - &ModuleToFunctionsToImportMap, - const FunctionInfoIndex &Index, - ModuleLazyLoaderCache &ModuleLoaderCache) { - while (!Worklist.empty()) { - auto CalledFunctionName = Worklist.pop_back_val(); - DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for " - << CalledFunctionName << "\n"); - - // Try to get a summary for this function call. - auto InfoList = Index.findFunctionInfoList(CalledFunctionName); - if (InfoList == Index.end()) { - DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for " - << CalledFunctionName << " Ignoring.\n"); +using EdgeInfo = std::pair<const FunctionSummary *, unsigned /* Threshold */>; + +/// Compute the list of functions to import for a given caller. Mark these +/// imported functions and the symbols they reference in their source module as +/// exported from their source module. +static void computeImportForFunction( + const FunctionSummary &Summary, const ModuleSummaryIndex &Index, + unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries, + SmallVectorImpl<EdgeInfo> &Worklist, + FunctionImporter::ImportMapTy &ImportsForModule, + StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { + for (auto &Edge : Summary.calls()) { + auto GUID = Edge.first.getGUID(); + DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n"); + + if (DefinedGVSummaries.count(GUID)) { + DEBUG(dbgs() << "ignored! Target already in destination module.\n"); continue; } - assert(!InfoList->second.empty() && "No summary, error at import?"); - - // Comdat can have multiple entries, FIXME: what do we do with them? - auto &Info = InfoList->second[0]; - assert(Info && "Nullptr in list, error importing summaries?\n"); - - auto *Summary = Info->functionSummary(); - if (!Summary) { - // FIXME: in case we are lazyloading summaries, we can do it now. - DEBUG(dbgs() << DestModule.getModuleIdentifier() - << ": Missing summary for " << CalledFunctionName - << ", error at import?\n"); - llvm_unreachable("Missing summary"); - } - if (Summary->instCount() > ImportInstrLimit) { - DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of " - << CalledFunctionName << " with " << Summary->instCount() - << " instructions (limit " << ImportInstrLimit << ")\n"); + auto *CalleeSummary = selectCallee(GUID, Threshold, Index); + if (!CalleeSummary) { + DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n"); continue; } - - // Get the module path from the summary. - auto ModuleIdentifier = Summary->modulePath(); - DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing " - << CalledFunctionName << " from " << ModuleIdentifier << "\n"); - - auto &SrcModule = ModuleLoaderCache(ModuleIdentifier); - - // The function that we will import! - GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName); - - if (!SGV) { - // The destination module is referencing function using their renamed name - // when importing a function that was originally local in the source - // module. The source module we have might not have been renamed so we try - // to remove the suffix added during the renaming to recover the original - // name in the source module. - std::pair<StringRef, StringRef> Split = - CalledFunctionName.split(".llvm."); - SGV = SrcModule.getNamedValue(Split.first); - assert(SGV && "Can't find function to import in source module"); + // "Resolve" the summary, traversing alias, + const FunctionSummary *ResolvedCalleeSummary; + if (isa<AliasSummary>(CalleeSummary)) { + ResolvedCalleeSummary = cast<FunctionSummary>( + &cast<AliasSummary>(CalleeSummary)->getAliasee()); + assert( + GlobalValue::isLinkOnceODRLinkage(ResolvedCalleeSummary->linkage()) && + "Unexpected alias to a non-linkonceODR in import list"); + } else + ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary); + + assert(ResolvedCalleeSummary->instCount() <= Threshold && + "selectCallee() didn't honor the threshold"); + + auto ExportModulePath = ResolvedCalleeSummary->modulePath(); + auto &ProcessedThreshold = ImportsForModule[ExportModulePath][GUID]; + /// Since the traversal of the call graph is DFS, we can revisit a function + /// a second time with a higher threshold. In this case, it is added back to + /// the worklist with the new threshold. + if (ProcessedThreshold && ProcessedThreshold >= Threshold) { + DEBUG(dbgs() << "ignored! Target was already seen with Threshold " + << ProcessedThreshold << "\n"); + continue; } - if (!SGV) { - report_fatal_error(Twine("Can't load function '") + CalledFunctionName + - "' in Module '" + SrcModule.getModuleIdentifier() + - "', error in the summary?\n"); + // Mark this function as imported in this module, with the current Threshold + ProcessedThreshold = Threshold; + + // Make exports in the source module. + if (ExportLists) { + auto &ExportList = (*ExportLists)[ExportModulePath]; + ExportList.insert(GUID); + // Mark all functions and globals referenced by this function as exported + // to the outside if they are defined in the same source module. + for (auto &Edge : ResolvedCalleeSummary->calls()) { + auto CalleeGUID = Edge.first.getGUID(); + exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList); + } + for (auto &Ref : ResolvedCalleeSummary->refs()) { + auto GUID = Ref.getGUID(); + exportGlobalInModule(Index, ExportModulePath, GUID, ExportList); + } } - Function *F = dyn_cast<Function>(SGV); - if (!F && isa<GlobalAlias>(SGV)) { - auto *SGA = dyn_cast<GlobalAlias>(SGV); - F = dyn_cast<Function>(SGA->getBaseObject()); - CalledFunctionName = F->getName(); - } - assert(F && "Imported Function is ... not a Function"); - - // We cannot import weak_any functions/aliases without possibly affecting - // the order they are seen and selected by the linker, changing program - // semantics. - if (SGV->hasWeakAnyLinkage()) { - DEBUG(dbgs() << DestModule.getModuleIdentifier() - << ": Ignoring import request for weak-any " - << (isa<Function>(SGV) ? "function " : "alias ") - << CalledFunctionName << " from " - << SrcModule.getModuleIdentifier() << "\n"); + // Insert the newly imported function to the worklist. + Worklist.push_back(std::make_pair(ResolvedCalleeSummary, Threshold)); + } +} + +/// Given the list of globals defined in a module, compute the list of imports +/// as well as the list of "exports", i.e. the list of symbols referenced from +/// another module (that may require promotion). +static void ComputeImportForModule( + const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, + FunctionImporter::ImportMapTy &ImportsForModule, + StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { + // Worklist contains the list of function imported in this module, for which + // we will analyse the callees and may import further down the callgraph. + SmallVector<EdgeInfo, 128> Worklist; + + // Populate the worklist with the import for the functions in the current + // module + for (auto &GVSummary : DefinedGVSummaries) { + auto *Summary = GVSummary.second; + if (auto *AS = dyn_cast<AliasSummary>(Summary)) + Summary = &AS->getAliasee(); + auto *FuncSummary = dyn_cast<FunctionSummary>(Summary); + if (!FuncSummary) + // Skip import for global variables continue; - } + DEBUG(dbgs() << "Initalize import for " << GVSummary.first << "\n"); + computeImportForFunction(*FuncSummary, Index, ImportInstrLimit, + DefinedGVSummaries, Worklist, ImportsForModule, + ExportLists); + } - // Add the function to the import list - auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()]; - Entry.insert(F); + while (!Worklist.empty()) { + auto FuncInfo = Worklist.pop_back_val(); + auto *Summary = FuncInfo.first; + auto Threshold = FuncInfo.second; // Process the newly imported functions and add callees to the worklist. - F->materialize(); - findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist); + // Adjust the threshold + Threshold = Threshold * ImportInstrFactor; + + computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries, + Worklist, ImportsForModule, ExportLists); } } +} // anonymous namespace + +/// Compute all the import and export for every module using the Index. +void llvm::ComputeCrossModuleImport( + const ModuleSummaryIndex &Index, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + StringMap<FunctionImporter::ImportMapTy> &ImportLists, + StringMap<FunctionImporter::ExportSetTy> &ExportLists) { + // For each module that has function defined, compute the import/export lists. + for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { + auto &ImportsForModule = ImportLists[DefinedGVSummaries.first()]; + DEBUG(dbgs() << "Computing import for Module '" + << DefinedGVSummaries.first() << "'\n"); + ComputeImportForModule(DefinedGVSummaries.second, Index, ImportsForModule, + &ExportLists); + } + +#ifndef NDEBUG + DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size() + << " modules:\n"); + for (auto &ModuleImports : ImportLists) { + auto ModName = ModuleImports.first(); + auto &Exports = ExportLists[ModName]; + DEBUG(dbgs() << "* Module " << ModName << " exports " << Exports.size() + << " functions. Imports from " << ModuleImports.second.size() + << " modules.\n"); + for (auto &Src : ModuleImports.second) { + auto SrcModName = Src.first(); + DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from " + << SrcModName << "\n"); + } + } +#endif +} + +/// Compute all the imports for the given module in the Index. +void llvm::ComputeCrossModuleImportForModule( + StringRef ModulePath, const ModuleSummaryIndex &Index, + FunctionImporter::ImportMapTy &ImportList) { + + // Collect the list of functions this module defines. + // GUID -> Summary + GVSummaryMapTy FunctionSummaryMap; + Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap); + + // Compute the import list for this module. + DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n"); + ComputeImportForModule(FunctionSummaryMap, Index, ImportList); + +#ifndef NDEBUG + DEBUG(dbgs() << "* Module " << ModulePath << " imports from " + << ImportList.size() << " modules.\n"); + for (auto &Src : ImportList) { + auto SrcModName = Src.first(); + DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from " + << SrcModName << "\n"); + } +#endif +} + +/// Compute the set of summaries needed for a ThinLTO backend compilation of +/// \p ModulePath. +void llvm::gatherImportedSummariesForModule( + StringRef ModulePath, + const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, + const StringMap<FunctionImporter::ImportMapTy> &ImportLists, + std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) { + // Include all summaries from the importing module. + ModuleToSummariesForIndex[ModulePath] = + ModuleToDefinedGVSummaries.lookup(ModulePath); + auto ModuleImports = ImportLists.find(ModulePath); + if (ModuleImports != ImportLists.end()) { + // Include summaries for imports. + for (auto &ILI : ModuleImports->second) { + auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()]; + const auto &DefinedGVSummaries = + ModuleToDefinedGVSummaries.lookup(ILI.first()); + for (auto &GI : ILI.second) { + const auto &DS = DefinedGVSummaries.find(GI.first); + assert(DS != DefinedGVSummaries.end() && + "Expected a defined summary for imported global value"); + SummariesForIndex[GI.first] = DS->second; + } + } + } +} + +/// Emit the files \p ModulePath will import from into \p OutputFilename. +std::error_code llvm::EmitImportsFiles( + StringRef ModulePath, StringRef OutputFilename, + const StringMap<FunctionImporter::ImportMapTy> &ImportLists) { + auto ModuleImports = ImportLists.find(ModulePath); + std::error_code EC; + raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + if (EC) + return EC; + if (ModuleImports != ImportLists.end()) + for (auto &ILI : ModuleImports->second) + ImportsOS << ILI.first() << "\n"; + return std::error_code(); +} + +/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis. +void llvm::thinLTOResolveWeakForLinkerModule( + Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { + auto updateLinkage = [&](GlobalValue &GV) { + if (!GlobalValue::isWeakForLinker(GV.getLinkage())) + return; + // See if the global summary analysis computed a new resolved linkage. + const auto &GS = DefinedGlobals.find(GV.getGUID()); + if (GS == DefinedGlobals.end()) + return; + auto NewLinkage = GS->second->linkage(); + if (NewLinkage == GV.getLinkage()) + return; + DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from " + << GV.getLinkage() << " to " << NewLinkage << "\n"); + GV.setLinkage(NewLinkage); + }; + + // Process functions and global now + for (auto &GV : TheModule) + updateLinkage(GV); + for (auto &GV : TheModule.globals()) + updateLinkage(GV); + for (auto &GV : TheModule.aliases()) + updateLinkage(GV); +} + +/// Run internalization on \p TheModule based on symmary analysis. +void llvm::thinLTOInternalizeModule(Module &TheModule, + const GVSummaryMapTy &DefinedGlobals) { + // Parse inline ASM and collect the list of symbols that are not defined in + // the current module. + StringSet<> AsmUndefinedRefs; + object::IRObjectFile::CollectAsmUndefinedRefs( + Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(), + [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { + if (Flags & object::BasicSymbolRef::SF_Undefined) + AsmUndefinedRefs.insert(Name); + }); + + // Declare a callback for the internalize pass that will ask for every + // candidate GlobalValue if it can be internalized or not. + auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { + // Can't be internalized if referenced in inline asm. + if (AsmUndefinedRefs.count(GV.getName())) + return true; + + // Lookup the linkage recorded in the summaries during global analysis. + const auto &GS = DefinedGlobals.find(GV.getGUID()); + GlobalValue::LinkageTypes Linkage; + if (GS == DefinedGlobals.end()) { + // Must have been promoted (possibly conservatively). Find original + // name so that we can access the correct summary and see if it can + // be internalized again. + // FIXME: Eventually we should control promotion instead of promoting + // and internalizing again. + StringRef OrigName = + ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName()); + std::string OrigId = GlobalValue::getGlobalIdentifier( + OrigName, GlobalValue::InternalLinkage, + TheModule.getSourceFileName()); + const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId)); + if (GS == DefinedGlobals.end()) { + // Also check the original non-promoted non-globalized name. In some + // cases a preempted weak value is linked in as a local copy because + // it is referenced by an alias (IRLinker::linkGlobalValueProto). + // In that case, since it was originally not a local value, it was + // recorded in the index using the original name. + // FIXME: This may not be needed once PR27866 is fixed. + const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName)); + assert(GS != DefinedGlobals.end()); + Linkage = GS->second->linkage(); + } else { + Linkage = GS->second->linkage(); + } + } else + Linkage = GS->second->linkage(); + return !GlobalValue::isLocalLinkage(Linkage); + }; + + // FIXME: See if we can just internalize directly here via linkage changes + // based on the index, rather than invoking internalizeModule. + llvm::internalizeModule(TheModule, MustPreserveGV); +} + // Automatically import functions in Module \p DestModule based on the summaries // index. // -// The current implementation imports every called functions that exists in the -// summaries index. -bool FunctionImporter::importFunctions(Module &DestModule) { +bool FunctionImporter::importFunctions( + Module &DestModule, const FunctionImporter::ImportMapTy &ImportList, + bool ForceImportReferencedDiscardableSymbols) { DEBUG(dbgs() << "Starting import for Module " << DestModule.getModuleIdentifier() << "\n"); unsigned ImportedCount = 0; - /// First step is collecting the called external functions. - StringSet<> CalledFunctions; - SmallVector<StringRef, 64> Worklist; - for (auto &F : DestModule) { - if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone)) - continue; - findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist); - } - if (Worklist.empty()) - return false; - - /// Second step: for every call to an external function, try to import it. - // Linker that will be used for importing function Linker TheLinker(DestModule); - - // Map of Module -> List of Function to import from the Module - std::map<StringRef, DenseSet<const GlobalValue *>> - ModuleToFunctionsToImportMap; - - // Analyze the summaries and get the list of functions to import by - // populating ModuleToFunctionsToImportMap - ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader); - GetImportList(DestModule, Worklist, CalledFunctions, - ModuleToFunctionsToImportMap, Index, ModuleLoaderCache); - assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList"); - - StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>> - ModuleToTempMDValsMap; - // Do the actual import of functions now, one Module at a time - for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) { + std::set<StringRef> ModuleNameOrderedList; + for (auto &FunctionsToImportPerModule : ImportList) { + ModuleNameOrderedList.insert(FunctionsToImportPerModule.first()); + } + for (auto &Name : ModuleNameOrderedList) { // Get the module for the import - auto &FunctionsToImport = FunctionsToImportPerModule.second; - std::unique_ptr<Module> SrcModule = - ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first); + const auto &FunctionsToImportPerModule = ImportList.find(Name); + assert(FunctionsToImportPerModule != ImportList.end()); + std::unique_ptr<Module> SrcModule = ModuleLoader(Name); assert(&DestModule.getContext() == &SrcModule->getContext() && "Context mismatch"); - // Save the mapping of value ids to temporary metadata created when - // importing this function. If we have already imported from this module, - // add new temporary metadata to the existing mapping. - auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()]; - if (!TempMDVals) - TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>(); + // If modules were created with lazy metadata loading, materialize it + // now, before linking it (otherwise this will be a noop). + SrcModule->materializeMetadata(); + UpgradeDebugInfo(*SrcModule); + + auto &ImportGUIDs = FunctionsToImportPerModule->second; + // Find the globals to import + DenseSet<const GlobalValue *> GlobalsToImport; + for (Function &F : *SrcModule) { + if (!F.hasName()) + continue; + auto GUID = F.getGUID(); + auto Import = ImportGUIDs.count(GUID); + DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing function " << GUID + << " " << F.getName() << " from " + << SrcModule->getSourceFileName() << "\n"); + if (Import) { + F.materialize(); + if (EnableImportMetadata) { + // Add 'thinlto_src_module' metadata for statistics and debugging. + F.setMetadata( + "thinlto_src_module", + llvm::MDNode::get( + DestModule.getContext(), + {llvm::MDString::get(DestModule.getContext(), + SrcModule->getSourceFileName())})); + } + GlobalsToImport.insert(&F); + } + } + for (GlobalVariable &GV : SrcModule->globals()) { + if (!GV.hasName()) + continue; + auto GUID = GV.getGUID(); + auto Import = ImportGUIDs.count(GUID); + DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing global " << GUID + << " " << GV.getName() << " from " + << SrcModule->getSourceFileName() << "\n"); + if (Import) { + GV.materialize(); + GlobalsToImport.insert(&GV); + } + } + for (GlobalAlias &GA : SrcModule->aliases()) { + if (!GA.hasName()) + continue; + auto GUID = GA.getGUID(); + auto Import = ImportGUIDs.count(GUID); + DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing alias " << GUID + << " " << GA.getName() << " from " + << SrcModule->getSourceFileName() << "\n"); + if (Import) { + // Alias can't point to "available_externally". However when we import + // linkOnceODR the linkage does not change. So we import the alias + // and aliasee only in this case. This has been handled by + // computeImportForFunction() + GlobalObject *GO = GA.getBaseObject(); + assert(GO->hasLinkOnceODRLinkage() && + "Unexpected alias to a non-linkonceODR in import list"); +#ifndef NDEBUG + if (!GlobalsToImport.count(GO)) + DEBUG(dbgs() << " alias triggers importing aliasee " << GO->getGUID() + << " " << GO->getName() << " from " + << SrcModule->getSourceFileName() << "\n"); +#endif + GO->materialize(); + GlobalsToImport.insert(GO); + GA.materialize(); + GlobalsToImport.insert(&GA); + } + } // Link in the specified functions. - if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None, - &Index, &FunctionsToImport, TempMDVals.get())) + if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport)) + return true; + + if (PrintImports) { + for (const auto *GV : GlobalsToImport) + dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName() + << " from " << SrcModule->getSourceFileName() << "\n"; + } + + // Instruct the linker that the client will take care of linkonce resolution + unsigned Flags = Linker::Flags::None; + if (!ForceImportReferencedDiscardableSymbols) + Flags |= Linker::Flags::DontForceLinkLinkonceODR; + + if (TheLinker.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport)) report_fatal_error("Function Import: link error"); - ImportedCount += FunctionsToImport.size(); + ImportedCount += GlobalsToImport.size(); } - // Now link in metadata for all modules from which we imported functions. - for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME : - ModuleToTempMDValsMap) { - // Load the specified source module. - auto &SrcModule = ModuleLoaderCache(SME.getKey()); - // The modules were created with lazy metadata loading. Materialize it - // now, before linking it. - SrcModule.materializeMetadata(); - UpgradeDebugInfo(SrcModule); - - // Link in all necessary metadata from this module. - if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get())) - return false; - } + NumImported += ImportedCount; DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " << DestModule.getModuleIdentifier() << "\n"); @@ -355,11 +696,11 @@ static void diagnosticHandler(const DiagnosticInfo &DI) { OS << '\n'; } -/// Parse the function index out of an IR file and return the function +/// Parse the summary index out of an IR file and return the summary /// index object if found, or nullptr if not. -static std::unique_ptr<FunctionInfoIndex> -getFunctionIndexForFile(StringRef Path, std::string &Error, - DiagnosticHandlerFunction DiagnosticHandler) { +static std::unique_ptr<ModuleSummaryIndex> getModuleSummaryIndexForFile( + StringRef Path, std::string &Error, + const DiagnosticHandlerFunction &DiagnosticHandler) { std::unique_ptr<MemoryBuffer> Buffer; ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getFile(Path); @@ -368,9 +709,9 @@ getFunctionIndexForFile(StringRef Path, std::string &Error, return nullptr; } Buffer = std::move(BufferOrErr.get()); - ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr = - object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(), - DiagnosticHandler); + ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = + object::ModuleSummaryIndexObjectFile::create(Buffer->getMemBufferRef(), + DiagnosticHandler); if (std::error_code EC = ObjOrErr.getError()) { Error = EC.message(); return nullptr; @@ -381,32 +722,34 @@ getFunctionIndexForFile(StringRef Path, std::string &Error, namespace { /// Pass that performs cross-module function import provided a summary file. class FunctionImportPass : public ModulePass { - /// Optional function summary index to use for importing, otherwise + /// Optional module summary index to use for importing, otherwise /// the summary-file option must be specified. - const FunctionInfoIndex *Index; + const ModuleSummaryIndex *Index; public: /// Pass identification, replacement for typeid static char ID; /// Specify pass name for debug output - const char *getPassName() const override { - return "Function Importing"; - } + const char *getPassName() const override { return "Function Importing"; } - explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr) + explicit FunctionImportPass(const ModuleSummaryIndex *Index = nullptr) : ModulePass(ID), Index(Index) {} bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + if (SummaryFile.empty() && !Index) report_fatal_error("error: -function-import requires -summary-file or " "file from frontend\n"); - std::unique_ptr<FunctionInfoIndex> IndexPtr; + std::unique_ptr<ModuleSummaryIndex> IndexPtr; if (!SummaryFile.empty()) { if (Index) report_fatal_error("error: -summary-file and index from frontend\n"); std::string Error; - IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); + IndexPtr = + getModuleSummaryIndexForFile(SummaryFile, Error, diagnosticHandler); if (!IndexPtr) { errs() << "Error loading file '" << SummaryFile << "': " << Error << "\n"; @@ -415,9 +758,14 @@ public: Index = IndexPtr.get(); } - // First we need to promote to global scope and rename any local values that + // First step is collecting the import list. + FunctionImporter::ImportMapTy ImportList; + ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index, + ImportList); + + // Next we need to promote to global scope and rename any local values that // are potentially exported to other modules. - if (renameModuleForThinLTO(M, Index)) { + if (renameModuleForThinLTO(M, *Index, nullptr)) { errs() << "Error renaming module\n"; return false; } @@ -427,7 +775,8 @@ public: return loadFile(Identifier, M.getContext()); }; FunctionImporter Importer(*Index, ModuleLoader); - return Importer.importFunctions(M); + return Importer.importFunctions( + M, ImportList, !DontForceImportReferencedDiscardableSymbols); } }; } // anonymous namespace @@ -439,7 +788,7 @@ INITIALIZE_PASS_END(FunctionImportPass, "function-import", "Summary Based Function Import", false, false) namespace llvm { -Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) { +Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr) { return new FunctionImportPass(Index); } } diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 9b276ed28e2e0..4c74698a1b619 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -15,15 +15,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Pass.h" #include <unordered_map> using namespace llvm; @@ -31,32 +32,41 @@ using namespace llvm; STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumIFuncs, "Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); namespace { - struct GlobalDCE : public ModulePass { + class GlobalDCELegacyPass : public ModulePass { + public: static char ID; // Pass identification, replacement for typeid - GlobalDCE() : ModulePass(ID) { - initializeGlobalDCEPass(*PassRegistry::getPassRegistry()); + GlobalDCELegacyPass() : ModulePass(ID) { + initializeGlobalDCELegacyPassPass(*PassRegistry::getPassRegistry()); } // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. // - bool runOnModule(Module &M) override; + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + + ModuleAnalysisManager DummyMAM; + auto PA = Impl.run(M, DummyMAM); + return !PA.areAllPreserved(); + } private: - SmallPtrSet<GlobalValue*, 32> AliveGlobals; - SmallPtrSet<Constant *, 8> SeenConstants; - std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; + GlobalDCEPass Impl; + }; +} - /// GlobalIsNeeded - mark the specific global value as needed, and - /// recursively mark anything that it uses as also needed. - void GlobalIsNeeded(GlobalValue *GV); - void MarkUsedGlobalsAsNeeded(Constant *C); +char GlobalDCELegacyPass::ID = 0; +INITIALIZE_PASS(GlobalDCELegacyPass, "globaldce", + "Dead Global Elimination", false, false) - bool RemoveUnusedGlobalValue(GlobalValue &GV); - }; +// Public interface to the GlobalDCEPass. +ModulePass *llvm::createGlobalDCEPass() { + return new GlobalDCELegacyPass(); } /// Returns true if F contains only a single "ret" instruction. @@ -68,13 +78,7 @@ static bool isEmptyFunction(Function *F) { return RI.getReturnValue() == nullptr; } -char GlobalDCE::ID = 0; -INITIALIZE_PASS(GlobalDCE, "globaldce", - "Dead Global Elimination", false, false) - -ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } - -bool GlobalDCE::runOnModule(Module &M) { +PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) { bool Changed = false; // Remove empty functions from the global ctors list. @@ -92,21 +96,14 @@ bool GlobalDCE::runOnModule(Module &M) { ComdatMembers.insert(std::make_pair(C, &GA)); // Loop over the module, adding globals which are obviously necessary. - for (Function &F : M) { - Changed |= RemoveUnusedGlobalValue(F); - // Functions with external linkage are needed if they have a body - if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage()) - if (!F.isDiscardableIfUnused()) - GlobalIsNeeded(&F); - } - - for (GlobalVariable &GV : M.globals()) { - Changed |= RemoveUnusedGlobalValue(GV); + for (GlobalObject &GO : M.global_objects()) { + Changed |= RemoveUnusedGlobalValue(GO); + // Functions with external linkage are needed if they have a body. // Externally visible & appending globals are needed, if they have an // initializer. - if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage()) - if (!GV.isDiscardableIfUnused()) - GlobalIsNeeded(&GV); + if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage()) + if (!GO.isDiscardableIfUnused()) + GlobalIsNeeded(&GO); } for (GlobalAlias &GA : M.aliases()) { @@ -116,6 +113,13 @@ bool GlobalDCE::runOnModule(Module &M) { GlobalIsNeeded(&GA); } + for (GlobalIFunc &GIF : M.ifuncs()) { + Changed |= RemoveUnusedGlobalValue(GIF); + // Externally visible ifuncs are needed. + if (!GIF.isDiscardableIfUnused()) + GlobalIsNeeded(&GIF); + } + // Now that all globals which are needed are in the AliveGlobals set, we loop // through the program, deleting those which are not alive. // @@ -150,6 +154,14 @@ bool GlobalDCE::runOnModule(Module &M) { GA.setAliasee(nullptr); } + // The third pass drops targets of ifuncs which are dead... + std::vector<GlobalIFunc*> DeadIFuncs; + for (GlobalIFunc &GIF : M.ifuncs()) + if (!AliveGlobals.count(&GIF)) { + DeadIFuncs.push_back(&GIF); + GIF.setResolver(nullptr); + } + if (!DeadFunctions.empty()) { // Now that all interferences have been dropped, delete the actual objects // themselves. @@ -180,17 +192,29 @@ bool GlobalDCE::runOnModule(Module &M) { Changed = true; } + // Now delete any dead aliases. + if (!DeadIFuncs.empty()) { + for (GlobalIFunc *GIF : DeadIFuncs) { + RemoveUnusedGlobalValue(*GIF); + M.getIFuncList().erase(GIF); + } + NumIFuncs += DeadIFuncs.size(); + Changed = true; + } + // Make sure that all memory is released AliveGlobals.clear(); SeenConstants.clear(); ComdatMembers.clear(); - return Changed; + if (Changed) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } /// GlobalIsNeeded - the specific global value as needed, and /// recursively mark anything that it uses as also needed. -void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { +void GlobalDCEPass::GlobalIsNeeded(GlobalValue *G) { // If the global is already in the set, no need to reprocess it. if (!AliveGlobals.insert(G).second) return; @@ -205,9 +229,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { // referenced by the initializer to the alive set. if (GV->hasInitializer()) MarkUsedGlobalsAsNeeded(GV->getInitializer()); - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) { - // The target of a global alias is needed. - MarkUsedGlobalsAsNeeded(GA->getAliasee()); + } else if (GlobalIndirectSymbol *GIS = dyn_cast<GlobalIndirectSymbol>(G)) { + // The target of a global alias or ifunc is needed. + MarkUsedGlobalsAsNeeded(GIS->getIndirectSymbol()); } else { // Otherwise this must be a function object. We have to scan the body of // the function looking for constants and global values which are used as @@ -228,7 +252,7 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { } } -void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { +void GlobalDCEPass::MarkUsedGlobalsAsNeeded(Constant *C) { if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return GlobalIsNeeded(GV); @@ -248,7 +272,7 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { // so, nuke it. This will reduce the reference count on the global value, which // might make it deader. // -bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { +bool GlobalDCEPass::RemoveUnusedGlobalValue(GlobalValue &GV) { if (GV.use_empty()) return false; GV.removeDeadConstantUsers(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index fd7736905fe84..310c29275faf2 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/GlobalOpt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -40,11 +40,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/Transforms/Utils/Evaluator.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> -#include <deque> using namespace llvm; #define DEBUG_TYPE "globalopt" @@ -65,46 +65,6 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); -namespace { - struct GlobalOpt : public ModulePass { - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - } - static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(ID) { - initializeGlobalOptPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; - - private: - bool OptimizeFunctions(Module &M); - bool OptimizeGlobalVars(Module &M); - bool OptimizeGlobalAliases(Module &M); - bool deleteIfDead(GlobalValue &GV); - bool processGlobal(GlobalValue &GV); - bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS); - bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - - bool isPointerValueDeadOnEntryToFunction(const Function *F, - GlobalValue *GV); - - TargetLibraryInfo *TLI; - SmallSet<const Comdat *, 8> NotDiscardableComdats; - }; -} - -char GlobalOpt::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", - "Global Variable Optimizer", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(GlobalOpt, "globalopt", - "Global Variable Optimizer", false, false) - -ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } - /// Is this global variable possibly used by a leak checker as a root? If so, /// we might not really want to eliminate the stores to it. static bool isLeakCheckerRoot(GlobalVariable *GV) { @@ -120,7 +80,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { return false; SmallVector<Type *, 4> Types; - Types.push_back(cast<PointerType>(GV->getType())->getElementType()); + Types.push_back(GV->getValueType()); unsigned Limit = 20; do { @@ -329,7 +289,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // we already know what the result of any load from that GEP is. // TODO: Handle splats. if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) - SubInit = Constant::getNullValue(GEP->getType()->getElementType()); + SubInit = Constant::getNullValue(GEP->getResultElementType()); } Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); @@ -475,7 +435,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (!GlobalUsersSafeToSRA(GV)) return nullptr; - assert(GV->hasLocalLinkage() && !GV->isConstant()); + assert(GV->hasLocalLinkage()); Constant *Init = GV->getInitializer(); Type *Ty = Init->getType(); @@ -499,6 +459,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); NGV->setExternallyInitialized(GV->isExternallyInitialized()); + NGV->copyAttributesFrom(GV); Globals.push_back(NGV); NewGlobals.push_back(NGV); @@ -533,6 +494,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); NGV->setExternallyInitialized(GV->isExternallyInitialized()); + NGV->copyAttributesFrom(GV); Globals.push_back(NGV); NewGlobals.push_back(NGV); @@ -867,9 +829,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, } Constant *RepValue = NewGV; - if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, - GV->getType()->getElementType()); + if (NewGV->getType() != GV->getValueType()) + RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType()); // If there is a comparison against null, we will insert a global bool to // keep track of whether the global was initialized yet or not. @@ -1283,6 +1244,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, std::vector<Value*> FieldGlobals; std::vector<Value*> FieldMallocs; + SmallVector<OperandBundleDef, 1> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + unsigned AS = GV->getType()->getPointerAddressSpace(); for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ Type *FieldTy = STy->getElementType(FieldNo); @@ -1292,6 +1256,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), nullptr, GV->getThreadLocalMode()); + NGV->copyAttributesFrom(GV); FieldGlobals.push_back(NGV); unsigned TypeSize = DL.getTypeAllocSize(FieldTy); @@ -1300,7 +1265,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Type *IntPtrTy = DL.getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, nullptr, + NElems, OpBundles, nullptr, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1359,7 +1324,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Cmp, NullPtrBlock); // Fill in FreeBlock. - CallInst::CreateFree(GVVal, BI); + CallInst::CreateFree(GVVal, OpBundles, BI); new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], FreeBlock); BranchInst::Create(NextBlock, FreeBlock); @@ -1397,8 +1362,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Insert a store of null into each global. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); - Constant *Null = Constant::getNullValue(PT->getElementType()); + Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType(); + Constant *Null = Constant::getNullValue(ValTy); new StoreInst(Null, FieldGlobals[i], SI); } // Erase the original store. @@ -1500,7 +1465,7 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, // into multiple malloc'd arrays, one for each field. This is basically // SRoA for malloc'd memory. - if (Ordering != NotAtomic) + if (Ordering != AtomicOrdering::NotAtomic) return false; // If this is an allocation of a fixed size array of structs, analyze as a @@ -1525,9 +1490,11 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); - Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, - AllocSize, NumElements, - nullptr, CI->getName()); + SmallVector<OperandBundleDef, 1> OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + Instruction *Malloc = + CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, + OpBundles, nullptr, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1583,7 +1550,7 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// boolean and select between the two values whenever it is used. This exposes /// the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { - Type *GVElType = GV->getType()->getElementType(); + Type *GVElType = GV->getValueType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is // an FP value, pointer or vector, don't do this optimization because a select @@ -1611,6 +1578,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GV->getName()+".b", GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); + NewGV->copyAttributesFrom(GV); GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); Constant *InitVal = GV->getInitializer(); @@ -1679,7 +1647,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { return true; } -bool GlobalOpt::deleteIfDead(GlobalValue &GV) { +static bool deleteIfDead(GlobalValue &GV, + SmallSet<const Comdat *, 8> &NotDiscardableComdats) { GV.removeDeadConstantUsers(); if (!GV.isDiscardableIfUnused()) @@ -1703,36 +1672,9 @@ bool GlobalOpt::deleteIfDead(GlobalValue &GV) { return true; } -/// Analyze the specified global variable and optimize it if possible. If we -/// make a change, return true. -bool GlobalOpt::processGlobal(GlobalValue &GV) { - // Do more involved optimizations if the global is internal. - if (!GV.hasLocalLinkage()) - return false; - - GlobalStatus GS; - - if (GlobalStatus::analyzeGlobal(&GV, GS)) - return false; - - bool Changed = false; - if (!GS.IsCompared && !GV.hasUnnamedAddr()) { - GV.setUnnamedAddr(true); - NumUnnamed++; - Changed = true; - } - - auto *GVar = dyn_cast<GlobalVariable>(&GV); - if (!GVar) - return Changed; - - if (GVar->isConstant() || !GVar->hasInitializer()) - return Changed; - - return processInternalGlobal(GVar, GS) || Changed; -} - -bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) { +static bool isPointerValueDeadOnEntryToFunction( + const Function *F, GlobalValue *GV, + function_ref<DominatorTree &(Function &)> LookupDomTree) { // Find all uses of GV. We expect them all to be in F, and if we can't // identify any of the uses we bail out. // @@ -1776,8 +1718,7 @@ bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalVal // of them are known not to depend on the value of the global at the function // entry point. We do this by ensuring that every load is dominated by at // least one store. - auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F)) - .getDomTree(); + auto &DT = LookupDomTree(*const_cast<Function *>(F)); // The below check is quadratic. Check we're not going to do too many tests. // FIXME: Even though this will always have worst-case quadratic time, we @@ -1866,8 +1807,9 @@ static void makeAllConstantUsesInstructions(Constant *C) { /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, - const GlobalStatus &GS) { +static bool processInternalGlobal( + GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI, + function_ref<DominatorTree &(Function &)> LookupDomTree) { auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function and // this function is non-recursive, we replace the global with a local alloca @@ -1879,16 +1821,17 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, // If the global is in different address space, don't bring it to stack. if (!GS.HasMultipleAccessingFunctions && GS.AccessingFunction && - GV->getType()->getElementType()->isSingleValueType() && + GV->getValueType()->isSingleValueType() && GV->getType()->getAddressSpace() == 0 && !GV->isExternallyInitialized() && allNonInstructionUsersCanBeMadeInstructions(GV) && GS.AccessingFunction->doesNotRecurse() && - isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) { + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, + LookupDomTree)) { DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction ->getEntryBlock().begin()); - Type *ElemTy = GV->getType()->getElementType(); + Type *ElemTy = GV->getValueType(); // FIXME: Pass Global's alignment when globals have alignment AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, GV->getName(), &FirstI); @@ -1896,7 +1839,7 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, new StoreInst(GV->getInitializer(), Alloca, &FirstI); makeAllConstantUsesInstructions(GV); - + GV->replaceAllUsesWith(Alloca); GV->eraseFromParent(); ++NumLocalized; @@ -1926,7 +1869,8 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, } return Changed; - } else if (GS.StoredType <= GlobalStatus::InitializerStored) { + } + if (GS.StoredType <= GlobalStatus::InitializerStored) { DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); GV->setConstant(true); @@ -1939,15 +1883,18 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; + return true; } + // Fall through to the next check; see if we can optimize further. ++NumMarked; - return true; - } else if (!GV->getInitializer()->getType()->isSingleValueType()) { + } + if (!GV->getInitializer()->getType()->isSingleValueType()) { const DataLayout &DL = GV->getParent()->getDataLayout(); if (SRAGlobal(GV, DL)) return true; - } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { + } + if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the // initializer to be the stored value, then delete all stores to the @@ -1978,7 +1925,7 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) { - if (GS.Ordering == NotAtomic) { + if (GS.Ordering == AtomicOrdering::NotAtomic) { if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { ++NumShrunkToBool; return true; @@ -1990,6 +1937,44 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, return false; } +/// Analyze the specified global variable and optimize it if possible. If we +/// make a change, return true. +static bool +processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, + function_ref<DominatorTree &(Function &)> LookupDomTree) { + if (GV.getName().startswith("llvm.")) + return false; + + GlobalStatus GS; + + if (GlobalStatus::analyzeGlobal(&GV, GS)) + return false; + + bool Changed = false; + if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) { + auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global + : GlobalValue::UnnamedAddr::Local; + if (NewUnnamedAddr != GV.getUnnamedAddr()) { + GV.setUnnamedAddr(NewUnnamedAddr); + NumUnnamed++; + Changed = true; + } + } + + // Do more involved optimizations if the global is internal. + if (!GV.hasLocalLinkage()) + return Changed; + + auto *GVar = dyn_cast<GlobalVariable>(&GV); + if (!GVar) + return Changed; + + if (GVar->isConstant() || !GVar->hasInitializer()) + return Changed; + + return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed; +} + /// Walk all of the direct calls of the specified function, changing them to /// FastCC. static void ChangeCalleesToFastCall(Function *F) { @@ -2034,7 +2019,10 @@ static bool isProfitableToMakeFastCC(Function *F) { return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; } -bool GlobalOpt::OptimizeFunctions(Module &M) { +static bool +OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, + function_ref<DominatorTree &(Function &)> LookupDomTree, + SmallSet<const Comdat *, 8> &NotDiscardableComdats) { bool Changed = false; // Optimize functions. for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { @@ -2043,12 +2031,12 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); - if (deleteIfDead(*F)) { + if (deleteIfDead(*F, NotDiscardableComdats)) { Changed = true; continue; } - Changed |= processGlobal(*F); + Changed |= processGlobal(*F, TLI, LookupDomTree); if (!F->hasLocalLinkage()) continue; @@ -2075,7 +2063,10 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { return Changed; } -bool GlobalOpt::OptimizeGlobalVars(Module &M) { +static bool +OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, + function_ref<DominatorTree &(Function &)> LookupDomTree, + SmallSet<const Comdat *, 8> &NotDiscardableComdats) { bool Changed = false; for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); @@ -2093,148 +2084,16 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { GV->setInitializer(New); } - if (deleteIfDead(*GV)) { + if (deleteIfDead(*GV, NotDiscardableComdats)) { Changed = true; continue; } - Changed |= processGlobal(*GV); + Changed |= processGlobal(*GV, TLI, LookupDomTree); } return Changed; } -static inline bool -isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant *> &SimpleConstants, - const DataLayout &DL); - -/// Return true if the specified constant can be handled by the code generator. -/// We don't want to generate something like: -/// void *X = &X/42; -/// because the code generator doesn't have a relocation that can handle that. -/// -/// This function should be called if C was not found (but just got inserted) -/// in SimpleConstants to avoid having to rescan the same constants all the -/// time. -static bool -isSimpleEnoughValueToCommitHelper(Constant *C, - SmallPtrSetImpl<Constant *> &SimpleConstants, - const DataLayout &DL) { - // Simple global addresses are supported, do not allow dllimport or - // thread-local globals. - if (auto *GV = dyn_cast<GlobalValue>(C)) - return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); - - // Simple integer, undef, constant aggregate zero, etc are all supported. - if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) - return true; - - // Aggregate values are safe if all their elements are. - if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || - isa<ConstantVector>(C)) { - for (Value *Op : C->operands()) - if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL)) - return false; - return true; - } - - // We don't know exactly what relocations are allowed in constant expressions, - // so we allow &global+constantoffset, which is safe and uniformly supported - // across targets. - ConstantExpr *CE = cast<ConstantExpr>(C); - switch (CE->getOpcode()) { - case Instruction::BitCast: - // Bitcast is fine if the casted value is fine. - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - - case Instruction::IntToPtr: - case Instruction::PtrToInt: - // int <=> ptr is fine if the int type is the same size as the - // pointer type. - if (DL.getTypeSizeInBits(CE->getType()) != - DL.getTypeSizeInBits(CE->getOperand(0)->getType())) - return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - - // GEP is fine if it is simple + constant offset. - case Instruction::GetElementPtr: - for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) - if (!isa<ConstantInt>(CE->getOperand(i))) - return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - - case Instruction::Add: - // We allow simple+cst. - if (!isa<ConstantInt>(CE->getOperand(1))) - return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); - } - return false; -} - -static inline bool -isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant *> &SimpleConstants, - const DataLayout &DL) { - // If we already checked this constant, we win. - if (!SimpleConstants.insert(C).second) - return true; - // Check the constant. - return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); -} - - -/// Return true if this constant is simple enough for us to understand. In -/// particular, if it is a cast to anything other than from one pointer type to -/// another pointer type, we punt. We basically just support direct accesses to -/// globals and GEP's of globals. This should be kept up to date with -/// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { - // Conservatively, avoid aggregate types. This is because we don't - // want to worry about them partially overlapping other stores. - if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) - return false; - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/*_odr/linkonce linkage or external globals. - return GV->hasUniqueInitializer(); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - // Handle a constantexpr gep. - if (CE->getOpcode() == Instruction::GetElementPtr && - isa<GlobalVariable>(CE->getOperand(0)) && - cast<GEPOperator>(CE)->isInBounds()) { - GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - if (!GV->hasUniqueInitializer()) - return false; - - // The first index must be zero. - ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); - if (!CI || !CI->isZero()) return false; - - // The remaining indices must be compile-time known integers within the - // notional bounds of the corresponding static array types. - if (!CE->isGEPWithNoNotionalOverIndexing()) - return false; - - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); - - // A constantexpr bitcast from a pointer to another pointer is a no-op, - // and we know how to evaluate it by moving the bitcast from the pointer - // operand to the value operand. - } else if (CE->getOpcode() == Instruction::BitCast && - isa<GlobalVariable>(CE->getOperand(0))) { - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); - } - } - - return false; -} - /// Evaluate a piece of a constantexpr store into a global initializer. This /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the /// GEP operands of Addr [0, OpNo) have been stepped into. @@ -2298,533 +2157,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); } -namespace { - -/// This class evaluates LLVM IR, producing the Constant representing each SSA -/// instruction. Changes to global variables are stored in a mapping that can -/// be iterated over after the evaluation is complete. Once an evaluation call -/// fails, the evaluation object should not be reused. -class Evaluator { -public: - Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) - : DL(DL), TLI(TLI) { - ValueStack.emplace_back(); - } - - ~Evaluator() { - for (auto &Tmp : AllocaTmps) - // If there are still users of the alloca, the program is doing something - // silly, e.g. storing the address of the alloca somewhere and using it - // later. Since this is undefined, we'll just make it be null. - if (!Tmp->use_empty()) - Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); - } - - /// Evaluate a call to function F, returning true if successful, false if we - /// can't evaluate it. ActualArgs contains the formal arguments for the - /// function. - bool EvaluateFunction(Function *F, Constant *&RetVal, - const SmallVectorImpl<Constant*> &ActualArgs); - - /// Evaluate all instructions in block BB, returning true if successful, false - /// if we can't evaluate it. NewBB returns the next BB that control flows - /// into, or null upon return. - bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB); - - Constant *getVal(Value *V) { - if (Constant *CV = dyn_cast<Constant>(V)) return CV; - Constant *R = ValueStack.back().lookup(V); - assert(R && "Reference to an uncomputed value!"); - return R; - } - - void setVal(Value *V, Constant *C) { - ValueStack.back()[V] = C; - } - - const DenseMap<Constant*, Constant*> &getMutatedMemory() const { - return MutatedMemory; - } - - const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const { - return Invariants; - } - -private: - Constant *ComputeLoadResult(Constant *P); - - /// As we compute SSA register values, we store their contents here. The back - /// of the deque contains the current function and the stack contains the - /// values in the calling frames. - std::deque<DenseMap<Value*, Constant*>> ValueStack; - - /// This is used to detect recursion. In pathological situations we could hit - /// exponential behavior, but at least there is nothing unbounded. - SmallVector<Function*, 4> CallStack; - - /// For each store we execute, we update this map. Loads check this to get - /// the most up-to-date value. If evaluation is successful, this state is - /// committed to the process. - DenseMap<Constant*, Constant*> MutatedMemory; - - /// To 'execute' an alloca, we create a temporary global variable to represent - /// its body. This vector is needed so we can delete the temporary globals - /// when we are done. - SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps; - - /// These global variables have been marked invariant by the static - /// constructor. - SmallPtrSet<GlobalVariable*, 8> Invariants; - - /// These are constants we have checked and know to be simple enough to live - /// in a static initializer of a global. - SmallPtrSet<Constant*, 8> SimpleConstants; - - const DataLayout &DL; - const TargetLibraryInfo *TLI; -}; - -} // anonymous namespace - -/// Return the value that would be computed by a load from P after the stores -/// reflected by 'memory' have been performed. If we can't decide, return null. -Constant *Evaluator::ComputeLoadResult(Constant *P) { - // If this memory location has been recently stored, use the stored value: it - // is the most up-to-date. - DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P); - if (I != MutatedMemory.end()) return I->second; - - // Access it. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { - if (GV->hasDefinitiveInitializer()) - return GV->getInitializer(); - return nullptr; - } - - // Handle a constantexpr getelementptr. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) - if (CE->getOpcode() == Instruction::GetElementPtr && - isa<GlobalVariable>(CE->getOperand(0))) { - GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - if (GV->hasDefinitiveInitializer()) - return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); - } - - return nullptr; // don't know how to evaluate. -} - -/// Evaluate all instructions in block BB, returning true if successful, false -/// if we can't evaluate it. NewBB returns the next BB that control flows into, -/// or null upon return. -bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, - BasicBlock *&NextBB) { - // This is the main evaluation loop. - while (1) { - Constant *InstResult = nullptr; - - DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); - - if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (!SI->isSimple()) { - DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); - return false; // no volatile/atomic accesses. - } - Constant *Ptr = getVal(SI->getOperand(1)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { - DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); - Ptr = ConstantFoldConstantExpression(CE, DL, TLI); - DEBUG(dbgs() << "; To: " << *Ptr << "\n"); - } - if (!isSimpleEnoughPointerToCommit(Ptr)) { - // If this is too complex for us to commit, reject it. - DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); - return false; - } - - Constant *Val = getVal(SI->getOperand(0)); - - // If this might be too difficult for the backend to handle (e.g. the addr - // of one global variable divided by another) then we can't commit it. - if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { - DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val - << "\n"); - return false; - } - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { - if (CE->getOpcode() == Instruction::BitCast) { - DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); - // If we're evaluating a store through a bitcast, then we need - // to pull the bitcast off the pointer type and push it onto the - // stored value. - Ptr = CE->getOperand(0); - - Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); - - // In order to push the bitcast onto the stored value, a bitcast - // from NewTy to Val's type must be legal. If it's not, we can try - // introspecting NewTy to find a legal conversion. - while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { - // If NewTy is a struct, we can convert the pointer to the struct - // into a pointer to its first member. - // FIXME: This could be extended to support arrays as well. - if (StructType *STy = dyn_cast<StructType>(NewTy)) { - NewTy = STy->getTypeAtIndex(0U); - - IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); - Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); - Constant * const IdxList[] = {IdxZero, IdxZero}; - - Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) - Ptr = ConstantFoldConstantExpression(CE, DL, TLI); - - // If we can't improve the situation by introspecting NewTy, - // we have to give up. - } else { - DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); - return false; - } - } - - // If we found compatible types, go ahead and push the bitcast - // onto the stored value. - Val = ConstantExpr::getBitCast(Val, NewTy); - - DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); - } - } - - MutatedMemory[Ptr] = Val; - } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { - InstResult = ConstantExpr::get(BO->getOpcode(), - getVal(BO->getOperand(0)), - getVal(BO->getOperand(1))); - DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult - << "\n"); - } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { - InstResult = ConstantExpr::getCompare(CI->getPredicate(), - getVal(CI->getOperand(0)), - getVal(CI->getOperand(1))); - DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult - << "\n"); - } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { - InstResult = ConstantExpr::getCast(CI->getOpcode(), - getVal(CI->getOperand(0)), - CI->getType()); - DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult - << "\n"); - } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { - InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), - getVal(SI->getOperand(1)), - getVal(SI->getOperand(2))); - DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult - << "\n"); - } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { - InstResult = ConstantExpr::getExtractValue( - getVal(EVI->getAggregateOperand()), EVI->getIndices()); - DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult - << "\n"); - } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { - InstResult = ConstantExpr::getInsertValue( - getVal(IVI->getAggregateOperand()), - getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); - DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult - << "\n"); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { - Constant *P = getVal(GEP->getOperand(0)); - SmallVector<Constant*, 8> GEPOps; - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); - i != e; ++i) - GEPOps.push_back(getVal(*i)); - InstResult = - ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, - cast<GEPOperator>(GEP)->isInBounds()); - DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult - << "\n"); - } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - - if (!LI->isSimple()) { - DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); - return false; // no volatile/atomic accesses. - } - - Constant *Ptr = getVal(LI->getOperand(0)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { - Ptr = ConstantFoldConstantExpression(CE, DL, TLI); - DEBUG(dbgs() << "Found a constant pointer expression, constant " - "folding: " << *Ptr << "\n"); - } - InstResult = ComputeLoadResult(Ptr); - if (!InstResult) { - DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." - "\n"); - return false; // Could not evaluate load. - } - - DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { - if (AI->isArrayAllocation()) { - DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); - return false; // Cannot handle array allocs. - } - Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back( - make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, - UndefValue::get(Ty), AI->getName())); - InstResult = AllocaTmps.back().get(); - DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); - } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { - CallSite CS(&*CurInst); - - // Debug info can safely be ignored here. - if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { - DEBUG(dbgs() << "Ignoring debug info.\n"); - ++CurInst; - continue; - } - - // Cannot handle inline asm. - if (isa<InlineAsm>(CS.getCalledValue())) { - DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); - return false; - } - - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { - if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { - if (MSI->isVolatile()) { - DEBUG(dbgs() << "Can not optimize a volatile memset " << - "intrinsic.\n"); - return false; - } - Constant *Ptr = getVal(MSI->getDest()); - Constant *Val = getVal(MSI->getValue()); - Constant *DestVal = ComputeLoadResult(getVal(Ptr)); - if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { - // This memset is a no-op. - DEBUG(dbgs() << "Ignoring no-op memset.\n"); - ++CurInst; - continue; - } - } - - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); - ++CurInst; - continue; - } - - if (II->getIntrinsicID() == Intrinsic::invariant_start) { - // We don't insert an entry into Values, as it doesn't have a - // meaningful return value. - if (!II->use_empty()) { - DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); - return false; - } - ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); - Value *PtrArg = getVal(II->getArgOperand(1)); - Value *Ptr = PtrArg->stripPointerCasts(); - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { - Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); - if (!Size->isAllOnesValue() && - Size->getValue().getLimitedValue() >= - DL.getTypeStoreSize(ElemTy)) { - Invariants.insert(GV); - DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV - << "\n"); - } else { - DEBUG(dbgs() << "Found a global var, but can not treat it as an " - "invariant.\n"); - } - } - // Continue even if we do nothing. - ++CurInst; - continue; - } else if (II->getIntrinsicID() == Intrinsic::assume) { - DEBUG(dbgs() << "Skipping assume intrinsic.\n"); - ++CurInst; - continue; - } - - DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); - return false; - } - - // Resolve function pointers. - Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); - if (!Callee || Callee->mayBeOverridden()) { - DEBUG(dbgs() << "Can not resolve function pointer.\n"); - return false; // Cannot resolve. - } - - SmallVector<Constant*, 8> Formals; - for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) - Formals.push_back(getVal(*i)); - - if (Callee->isDeclaration()) { - // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { - InstResult = C; - DEBUG(dbgs() << "Constant folded function call. Result: " << - *InstResult << "\n"); - } else { - DEBUG(dbgs() << "Can not constant fold function call.\n"); - return false; - } - } else { - if (Callee->getFunctionType()->isVarArg()) { - DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); - return false; - } - - Constant *RetVal = nullptr; - // Execute the call, if successful, use the return value. - ValueStack.emplace_back(); - if (!EvaluateFunction(Callee, RetVal, Formals)) { - DEBUG(dbgs() << "Failed to evaluate function.\n"); - return false; - } - ValueStack.pop_back(); - InstResult = RetVal; - - if (InstResult) { - DEBUG(dbgs() << "Successfully evaluated function. Result: " << - InstResult << "\n\n"); - } else { - DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); - } - } - } else if (isa<TerminatorInst>(CurInst)) { - DEBUG(dbgs() << "Found a terminator instruction.\n"); - - if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { - if (BI->isUnconditional()) { - NextBB = BI->getSuccessor(0); - } else { - ConstantInt *Cond = - dyn_cast<ConstantInt>(getVal(BI->getCondition())); - if (!Cond) return false; // Cannot determine. - - NextBB = BI->getSuccessor(!Cond->getZExtValue()); - } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { - ConstantInt *Val = - dyn_cast<ConstantInt>(getVal(SI->getCondition())); - if (!Val) return false; // Cannot determine. - NextBB = SI->findCaseValue(Val).getCaseSuccessor(); - } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { - Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); - if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) - NextBB = BA->getBasicBlock(); - else - return false; // Cannot determine. - } else if (isa<ReturnInst>(CurInst)) { - NextBB = nullptr; - } else { - // invoke, unwind, resume, unreachable. - DEBUG(dbgs() << "Can not handle terminator."); - return false; // Cannot handle this terminator. - } - - // We succeeded at evaluating this block! - DEBUG(dbgs() << "Successfully evaluated block.\n"); - return true; - } else { - // Did not know how to evaluate this! - DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." - "\n"); - return false; - } - - if (!CurInst->use_empty()) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) - InstResult = ConstantFoldConstantExpression(CE, DL, TLI); - - setVal(&*CurInst, InstResult); - } - - // If we just processed an invoke, we finished evaluating the block. - if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { - NextBB = II->getNormalDest(); - DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); - return true; - } - - // Advance program counter. - ++CurInst; - } -} - -/// Evaluate a call to function F, returning true if successful, false if we -/// can't evaluate it. ActualArgs contains the formal arguments for the -/// function. -bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, - const SmallVectorImpl<Constant*> &ActualArgs) { - // Check to see if this function is already executing (recursion). If so, - // bail out. TODO: we might want to accept limited recursion. - if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) - return false; - - CallStack.push_back(F); - - // Initialize arguments to the incoming values specified. - unsigned ArgNo = 0; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; - ++AI, ++ArgNo) - setVal(&*AI, ActualArgs[ArgNo]); - - // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, - // we can only evaluate any one basic block at most once. This set keeps - // track of what we have executed so we can detect recursive cases etc. - SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; - - // CurBB - The current basic block we're evaluating. - BasicBlock *CurBB = &F->front(); - - BasicBlock::iterator CurInst = CurBB->begin(); - - while (1) { - BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. - DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); - - if (!EvaluateBlock(CurInst, NextBB)) - return false; - - if (!NextBB) { - // Successfully running until there's no next block means that we found - // the return. Fill it the return value and pop the call stack. - ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); - if (RI->getNumOperands()) - RetVal = getVal(RI->getOperand(0)); - CallStack.pop_back(); - return true; - } - - // Okay, we succeeded in evaluating this control flow. See if we have - // executed the new block before. If so, we have a looping function, - // which we cannot evaluate in reasonable time. - if (!ExecutedBlocks.insert(NextBB).second) - return false; // looped! - - // Okay, we have never been in this block before. Check to see if there - // are any PHI nodes. If so, evaluate them with information about where - // we came from. - PHINode *PN = nullptr; - for (CurInst = NextBB->begin(); - (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) - setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); - - // Advance to the next block. - CurBB = NextBB; - } -} - /// Evaluate static constructors in the function, if we can. Return true if we /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + TargetLibraryInfo *TLI) { // Call the function. Evaluator Eval(DL, TLI); Constant *RetValDummy; @@ -2838,10 +2174,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << Eval.getMutatedMemory().size() << " stores.\n"); - for (DenseMap<Constant*, Constant*>::const_iterator I = - Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end(); - I != E; ++I) - CommitValueTo(I->second, I->first); + for (const auto &I : Eval.getMutatedMemory()) + CommitValueTo(I.second, I.first); for (GlobalVariable *GV : Eval.getInvariants()) GV->setConstant(true); } @@ -2850,8 +2184,9 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, } static int compareNames(Constant *const *A, Constant *const *B) { - return (*A)->stripPointerCasts()->getName().compare( - (*B)->stripPointerCasts()->getName()); + Value *AStripped = (*A)->stripPointerCastsNoFollowAliases(); + Value *BStripped = (*B)->stripPointerCastsNoFollowAliases(); + return AStripped->getName().compare(BStripped->getName()); } static void setUsedInitializer(GlobalVariable &V, @@ -2995,7 +2330,9 @@ static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, return true; } -bool GlobalOpt::OptimizeGlobalAliases(Module &M) { +static bool +OptimizeGlobalAliases(Module &M, + SmallSet<const Comdat *, 8> &NotDiscardableComdats) { bool Changed = false; LLVMUsed Used(M); @@ -3010,13 +2347,13 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) J->setLinkage(GlobalValue::InternalLinkage); - if (deleteIfDead(*J)) { + if (deleteIfDead(*J, NotDiscardableComdats)) { Changed = true; continue; } // If the aliasee may change at link time, nothing can be done - bail out. - if (J->mayBeOverridden()) + if (J->isInterposable()) continue; Constant *Aliasee = J->getAliasee(); @@ -3064,23 +2401,16 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { } static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::cxa_atexit)) + LibFunc::Func F = LibFunc::cxa_atexit; + if (!TLI->has(F)) return nullptr; - Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); - + Function *Fn = M.getFunction(TLI->getName(F)); if (!Fn) return nullptr; - FunctionType *FTy = Fn->getFunctionType(); - - // Checking that the function has the right return type, the right number of - // parameters and that they all have pointer types should be enough. - if (!FTy->getReturnType()->isIntegerTy() || - FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) + // Make sure that the function has the correct prototype. + if (!TLI->getLibFunc(*Fn, F) || F != LibFunc::cxa_atexit) return nullptr; return Fn; @@ -3132,7 +2462,7 @@ static bool cxxDtorIsEmpty(const Function &Fn, return false; } -bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { +static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { /// Itanium C++ ABI p3.3.5: /// /// After constructing a global (or local static) object, that will require @@ -3179,12 +2509,11 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { return Changed; } -bool GlobalOpt::runOnModule(Module &M) { +static bool optimizeGlobalsInModule( + Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, + function_ref<DominatorTree &(Function &)> LookupDomTree) { + SmallSet<const Comdat *, 8> NotDiscardableComdats; bool Changed = false; - - auto &DL = M.getDataLayout(); - TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3204,7 +2533,8 @@ bool GlobalOpt::runOnModule(Module &M) { NotDiscardableComdats.insert(C); // Delete functions that are trivially dead, ccc -> fastcc - LocalChange |= OptimizeFunctions(M); + LocalChange |= + OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats); // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { @@ -3212,10 +2542,11 @@ bool GlobalOpt::runOnModule(Module &M) { }); // Optimize non-address-taken globals. - LocalChange |= OptimizeGlobalVars(M); + LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree, + NotDiscardableComdats); // Resolve aliases, when possible. - LocalChange |= OptimizeGlobalAliases(M); + LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); // Try to remove trivial global destructors if they are not removed // already. @@ -3232,3 +2563,53 @@ bool GlobalOpt::runOnModule(Module &M) { return Changed; } +PreservedAnalyses GlobalOptPass::run(Module &M, AnalysisManager<Module> &AM) { + auto &DL = M.getDataLayout(); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); + auto &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ + return FAM.getResult<DominatorTreeAnalysis>(F); + }; + if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree)) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} + +namespace { +struct GlobalOptLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + GlobalOptLegacyPass() : ModulePass(ID) { + initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + + auto &DL = M.getDataLayout(); + auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto LookupDomTree = [this](Function &F) -> DominatorTree & { + return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); + }; + return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + } +}; +} + +char GlobalOptLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt", + "Global Variable Optimizer", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt", + "Global Variable Optimizer", false, false) + +ModulePass *llvm::createGlobalOptimizerPass() { + return new GlobalOptLegacyPass(); +} diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index af541d1552545..916135e33cd50 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -41,44 +41,14 @@ namespace { } bool runOnModule(Module &M) override; - private: - bool PropagateConstantsIntoArguments(Function &F); - bool PropagateConstantReturn(Function &F); }; } -char IPCP::ID = 0; -INITIALIZE_PASS(IPCP, "ipconstprop", - "Interprocedural constant propagation", false, false) - -ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } - -bool IPCP::runOnModule(Module &M) { - bool Changed = false; - bool LocalChange = true; - - // FIXME: instead of using smart algorithms, we just iterate until we stop - // making changes. - while (LocalChange) { - LocalChange = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) { - // Delete any klingons. - I->removeDeadConstantUsers(); - if (I->hasLocalLinkage()) - LocalChange |= PropagateConstantsIntoArguments(*I); - Changed |= PropagateConstantReturn(*I); - } - Changed |= LocalChange; - } - return Changed; -} - /// PropagateConstantsIntoArguments - Look at all uses of the specified /// function. If all uses are direct call sites, and all pass a particular /// constant in for an argument, propagate that constant in as the argument. /// -bool IPCP::PropagateConstantsIntoArguments(Function &F) { +static bool PropagateConstantsIntoArguments(Function &F) { if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. // For each argument, keep track of its constant value and whether it is a @@ -157,13 +127,14 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { // Additionally if a function always returns one of its arguments directly, // callers will be updated to use the value they pass in directly instead of // using the return value. -bool IPCP::PropagateConstantReturn(Function &F) { +static bool PropagateConstantReturn(Function &F) { if (F.getReturnType()->isVoidTy()) return false; // No return value. - // If this function could be overridden later in the link stage, we can't - // propagate information about its results into callers. - if (F.mayBeOverridden()) + // We can infer and propagate the return value only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F.isDefinitionExact()) return false; // Check to see if this function returns a constant. @@ -176,8 +147,8 @@ bool IPCP::PropagateConstantReturn(Function &F) { RetVals.push_back(UndefValue::get(F.getReturnType())); unsigned NumNonConstant = 0; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + for (BasicBlock &BB : F) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { for (unsigned i = 0, e = RetVals.size(); i != e; ++i) { // Already found conflicting return values? Value *RV = RetVals[i]; @@ -277,3 +248,33 @@ bool IPCP::PropagateConstantReturn(Function &F) { if (MadeChange) ++NumReturnValProped; return MadeChange; } + +char IPCP::ID = 0; +INITIALIZE_PASS(IPCP, "ipconstprop", + "Interprocedural constant propagation", false, false) + +ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } + +bool IPCP::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + bool Changed = false; + bool LocalChange = true; + + // FIXME: instead of using smart algorithms, we just iterate until we stop + // making changes. + while (LocalChange) { + LocalChange = false; + for (Function &F : M) + if (!F.isDeclaration()) { + // Delete any klingons. + F.removeDeadConstantUsers(); + if (F.hasLocalLinkage()) + LocalChange |= PropagateConstantsIntoArguments(F); + Changed |= PropagateConstantReturn(F); + } + Changed |= LocalChange; + } + return Changed; +} diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 89629cf06e083..3507eba81b2f5 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -18,31 +18,32 @@ #include "llvm/InitializePasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeArgPromotionPass(Registry); - initializeConstantMergePass(Registry); + initializeConstantMergeLegacyPassPass(Registry); initializeCrossDSOCFIPass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeGlobalDCEPass(Registry); - initializeGlobalOptPass(Registry); + initializeGlobalDCELegacyPassPass(Registry); + initializeGlobalOptLegacyPassPass(Registry); initializeIPCPPass(Registry); initializeAlwaysInlinerPass(Registry); initializeSimpleInlinerPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); - initializeInternalizePassPass(Registry); + initializeInternalizeLegacyPassPass(Registry); initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); - initializeLowerBitSetsPass(Registry); + initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsPass(Registry); - initializePartialInlinerPass(Registry); - initializePostOrderFunctionAttrsPass(Registry); - initializeReversePostOrderFunctionAttrsPass(Registry); + initializePartialInlinerLegacyPassPass(Registry); + initializePostOrderFunctionAttrsLegacyPassPass(Registry); + initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry); initializePruneEHPass(Registry); initializeStripDeadPrototypesLegacyPassPass(Registry); initializeStripSymbolsPass(Registry); @@ -50,9 +51,10 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); - initializeEliminateAvailableExternallyPass(Registry); - initializeSampleProfileLoaderPass(Registry); + initializeEliminateAvailableExternallyLegacyPassPass(Registry); + initializeSampleProfileLoaderLegacyPassPass(Registry); initializeFunctionImportPassPass(Registry); + initializeWholeProgramDevirtPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { @@ -72,7 +74,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { } void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createPostOrderFunctionAttrsPass()); + unwrap(PM)->add(createPostOrderFunctionAttrsLegacyPass()); } void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { @@ -104,10 +106,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { } void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { - std::vector<const char *> Export; - if (AllButMain) - Export.push_back("main"); - unwrap(PM)->add(createInternalizePass(Export)); + auto PreserveMain = [=](const GlobalValue &GV) { + return AllButMain && GV.getName() == "main"; + }; + unwrap(PM)->add(createInternalizePass(PreserveMain)); } void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 4295a7595c29f..ab2d2bd8b02a6 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/InferFunctionAttrs.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/Function.h" @@ -16,937 +15,27 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; #define DEBUG_TYPE "inferattrs" -STATISTIC(NumReadNone, "Number of functions inferred as readnone"); -STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); -STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly"); -STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); -STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); -STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); -STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); -STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns"); - -static bool setDoesNotAccessMemory(Function &F) { - if (F.doesNotAccessMemory()) - return false; - F.setDoesNotAccessMemory(); - ++NumReadNone; - return true; -} - -static bool setOnlyReadsMemory(Function &F) { - if (F.onlyReadsMemory()) - return false; - F.setOnlyReadsMemory(); - ++NumReadOnly; - return true; -} - -static bool setOnlyAccessesArgMemory(Function &F) { - if (F.onlyAccessesArgMemory()) - return false; - F.setOnlyAccessesArgMemory (); - ++NumArgMemOnly; - return true; -} - - -static bool setDoesNotThrow(Function &F) { - if (F.doesNotThrow()) - return false; - F.setDoesNotThrow(); - ++NumNoUnwind; - return true; -} - -static bool setDoesNotCapture(Function &F, unsigned n) { - if (F.doesNotCapture(n)) - return false; - F.setDoesNotCapture(n); - ++NumNoCapture; - return true; -} - -static bool setOnlyReadsMemory(Function &F, unsigned n) { - if (F.onlyReadsMemory(n)) - return false; - F.setOnlyReadsMemory(n); - ++NumReadOnlyArg; - return true; -} - -static bool setDoesNotAlias(Function &F, unsigned n) { - if (F.doesNotAlias(n)) - return false; - F.setDoesNotAlias(n); - ++NumNoAlias; - return true; -} - -static bool setNonNull(Function &F, unsigned n) { - assert((n != AttributeSet::ReturnIndex || - F.getReturnType()->isPointerTy()) && - "nonnull applies only to pointers"); - if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) - return false; - F.addAttribute(n, Attribute::NonNull); - ++NumNonNull; - return true; -} - -/// Analyze the name and prototype of the given function and set any applicable -/// attributes. -/// -/// Returns true if any attributes were set and false otherwise. -static bool inferPrototypeAttributes(Function &F, - const TargetLibraryInfo &TLI) { - if (F.hasFnAttribute(Attribute::OptimizeNone)) - return false; - - FunctionType *FTy = F.getFunctionType(); - LibFunc::Func TheLibFunc; - if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc))) - return false; - - bool Changed = false; - switch (TheLibFunc) { - case LibFunc::strlen: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::strchr: - case LibFunc::strrchr: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return false; - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc::strtol: - case LibFunc::strtod: - case LibFunc::strtof: - case LibFunc::strtoul: - case LibFunc::strtoll: - case LibFunc::strtold: - case LibFunc::strtoull: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::strcpy: - case LibFunc::stpcpy: - case LibFunc::strcat: - case LibFunc::strncat: - case LibFunc::strncpy: - case LibFunc::stpncpy: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::strxfrm: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::strcmp: // 0,1 - case LibFunc::strspn: // 0,1 - case LibFunc::strncmp: // 0,1 - case LibFunc::strcspn: // 0,1 - case LibFunc::strcoll: // 0,1 - case LibFunc::strcasecmp: // 0,1 - case LibFunc::strncasecmp: // - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::strstr: - case LibFunc::strpbrk: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::strtok: - case LibFunc::strtok_r: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::scanf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::setbuf: - case LibFunc::setvbuf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::strdup: - case LibFunc::strndup: - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::stat: - case LibFunc::statvfs: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::sscanf: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::sprintf: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::snprintf: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); - return Changed; - case LibFunc::setitimer: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::system: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "system" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::malloc: - if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - return Changed; - case LibFunc::memcmp: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::memchr: - case LibFunc::memrchr: - if (FTy->getNumParams() != 3) - return false; - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc::modf: - case LibFunc::modff: - case LibFunc::modfl: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::memcpy: - case LibFunc::memccpy: - case LibFunc::memmove: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::memalign: - if (!FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotAlias(F, 0); - return Changed; - case LibFunc::mkdir: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::mktime: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::realloc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::read: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "read" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::rewind: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::rmdir: - case LibFunc::remove: - case LibFunc::realpath: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::rename: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::readlink: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::write: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "write" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::bcopy: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::bcmp: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::bzero: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::calloc: - if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - return Changed; - case LibFunc::chmod: - case LibFunc::chown: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::ctermid: - case LibFunc::clearerr: - case LibFunc::closedir: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::atoi: - case LibFunc::atol: - case LibFunc::atof: - case LibFunc::atoll: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::access: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::fopen: - if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::fdopen: - if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::feof: - case LibFunc::free: - case LibFunc::fseek: - case LibFunc::ftell: - case LibFunc::fgetc: - case LibFunc::fseeko: - case LibFunc::ftello: - case LibFunc::fileno: - case LibFunc::fflush: - case LibFunc::fclose: - case LibFunc::fsetpos: - case LibFunc::flockfile: - case LibFunc::funlockfile: - case LibFunc::ftrylockfile: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::ferror: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F); - return Changed; - case LibFunc::fputc: - case LibFunc::fstat: - case LibFunc::frexp: - case LibFunc::frexpf: - case LibFunc::frexpl: - case LibFunc::fstatvfs: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::fgets: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 3); - return Changed; - case LibFunc::fread: - if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); - return Changed; - case LibFunc::fwrite: - if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); - return Changed; - case LibFunc::fputs: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::fscanf: - case LibFunc::fprintf: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::fgetpos: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::getc: - case LibFunc::getlogin_r: - case LibFunc::getc_unlocked: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::getenv: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::gets: - case LibFunc::getchar: - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc::getitimer: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::getpwnam: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::ungetc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::uname: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::unlink: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::unsetenv: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::utime: - case LibFunc::utimes: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::putc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::puts: - case LibFunc::printf: - case LibFunc::perror: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::pread: - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "pread" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::pwrite: - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::putchar: - Changed |= setDoesNotThrow(F); - return Changed; - case LibFunc::popen: - if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::pclose: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::vscanf: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::vsscanf: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::vfscanf: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::valloc: - if (!FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - return Changed; - case LibFunc::vprintf: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::vfprintf: - case LibFunc::vsprintf: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::vsnprintf: - if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); - return Changed; - case LibFunc::open: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::opendir: - if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::tmpfile: - if (!FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - return Changed; - case LibFunc::times: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::htonl: - case LibFunc::htons: - case LibFunc::ntohl: - case LibFunc::ntohs: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAccessMemory(F); - return Changed; - case LibFunc::lstat: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::lchown: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::qsort: - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return false; - // May throw; places call through function pointer. - Changed |= setDoesNotCapture(F, 4); - return Changed; - case LibFunc::dunder_strdup: - case LibFunc::dunder_strndup: - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::dunder_strtok_r: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::under_IO_getc: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::under_IO_putc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::dunder_isoc99_scanf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::stat64: - case LibFunc::lstat64: - case LibFunc::statvfs64: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::dunder_isoc99_sscanf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::fopen64: - if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - case LibFunc::fseeko64: - case LibFunc::ftello64: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - return Changed; - case LibFunc::tmpfile64: - if (!FTy->getReturnType()->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - return Changed; - case LibFunc::fstat64: - case LibFunc::fstatvfs64: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::open64: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); - return Changed; - case LibFunc::gettimeofday: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - // Currently some platforms have the restrict keyword on the arguments to - // gettimeofday. To be conservative, do not add noalias to gettimeofday's - // arguments. - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - return Changed; - - case LibFunc::Znwj: // new(unsigned int) - case LibFunc::Znwm: // new(unsigned long) - case LibFunc::Znaj: // new[](unsigned int) - case LibFunc::Znam: // new[](unsigned long) - case LibFunc::msvc_new_int: // new(unsigned int) - case LibFunc::msvc_new_longlong: // new(unsigned long long) - case LibFunc::msvc_new_array_int: // new[](unsigned int) - case LibFunc::msvc_new_array_longlong: // new[](unsigned long long) - if (FTy->getNumParams() != 1) - return false; - // Operator new always returns a nonnull noalias pointer - Changed |= setNonNull(F, AttributeSet::ReturnIndex); - Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex); - return Changed; - - //TODO: add LibFunc entries for: - //case LibFunc::memset_pattern4: - //case LibFunc::memset_pattern8: - case LibFunc::memset_pattern16: - if (FTy->isVarArg() || FTy->getNumParams() != 3 || - !isa<PointerType>(FTy->getParamType(0)) || - !isa<PointerType>(FTy->getParamType(1)) || - !isa<IntegerType>(FTy->getParamType(2))) - return false; - - Changed |= setOnlyAccessesArgMemory(F); - Changed |= setOnlyReadsMemory(F, 2); - return Changed; - - default: - // FIXME: It'd be really nice to cover all the library functions we're - // aware of here. - return false; - } -} - static bool inferAllPrototypeAttributes(Module &M, const TargetLibraryInfo &TLI) { bool Changed = false; for (Function &F : M.functions()) - // We only infer things using the prototype if the definition isn't around - // to analyze directly. - if (F.isDeclaration()) - Changed |= inferPrototypeAttributes(F, TLI); + // We only infer things using the prototype and the name; we don't need + // definitions. + if (F.isDeclaration() && !F.hasFnAttribute((Attribute::OptimizeNone))) + Changed |= inferLibFuncAttributes(F, TLI); return Changed; } PreservedAnalyses InferFunctionAttrsPass::run(Module &M, - AnalysisManager<Module> *AM) { - auto &TLI = AM->getResult<TargetLibraryAnalysis>(M); + AnalysisManager<Module> &AM) { + auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); if (!inferAllPrototypeAttributes(M, TLI)) // If we didn't infer anything, preserve all analyses. @@ -970,6 +59,9 @@ struct InferFunctionAttrsLegacyPass : public ModulePass { } bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); return inferAllPrototypeAttributes(M, TLI); } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 1704bfea0b86a..cb1ab95ec2af1 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -37,16 +38,17 @@ namespace { class AlwaysInliner : public Inliner { public: - // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { + AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } - AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime) { + AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } + /// Main run interface method. We override here to avoid calling skipSCC(). + bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); } + static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) override; @@ -64,6 +66,7 @@ INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 45609f891ed87..2aa650bd219dc 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" @@ -38,14 +39,20 @@ namespace { /// inliner pass and the always inliner pass. The two passes use different cost /// analyses to determine when to inline. class SimpleInliner : public Inliner { + // This field is populated based on one of the following: + // * optimization or size-optimization levels, + // * the --inline-threshold flag, or + // * a user specified value. + int DefaultThreshold; public: - SimpleInliner() : Inliner(ID) { + SimpleInliner() + : Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } - SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true) { + explicit SimpleInliner(int Threshold) + : Inliner(ID), DefaultThreshold(Threshold) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } @@ -54,7 +61,7 @@ public: InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); - return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT); + return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, PSI); } bool runOnSCC(CallGraphSCC &SCC) override; @@ -64,17 +71,6 @@ private: TargetTransformInfoWrapperPass *TTIWP; }; -static int computeThresholdFromOptLevels(unsigned OptLevel, - unsigned SizeOptLevel) { - if (OptLevel > 2) - return 275; - if (SizeOptLevel == 1) // -Os - return 75; - if (SizeOptLevel == 2) // -Oz - return 25; - return 225; -} - } // end anonymous namespace char SimpleInliner::ID = 0; @@ -82,6 +78,7 @@ INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(SimpleInliner, "inline", @@ -96,7 +93,7 @@ Pass *llvm::createFunctionInliningPass(int Threshold) { Pass *llvm::createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel) { return new SimpleInliner( - computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); + llvm::computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); } bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index bbe5f8761d5f1..79535ca497803 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -21,6 +20,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -28,9 +28,9 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -47,40 +47,19 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together"); // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); -static cl::opt<int> -InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, - cl::desc("Control the amount of inlining to perform (default = 225)")); - -static cl::opt<int> -HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), - cl::desc("Threshold for inlining functions with inline hint")); - -// We instroduce this threshold to help performance of instrumentation based -// PGO before we actually hook up inliner with analysis passes such as BPI and -// BFI. -static cl::opt<int> -ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), - cl::desc("Threshold for inlining functions with cold attribute")); - -// Threshold to use when optsize is specified (and there is no -inline-limit). -const int OptSizeThreshold = 75; +Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} -Inliner::Inliner(char &ID) - : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) { -} - -Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) - : CallGraphSCCPass(ID), - InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit - : Threshold), - InsertLifetime(InsertLifetime) {} +Inliner::Inliner(char &ID, bool InsertLifetime) + : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -243,67 +222,6 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, return true; } -unsigned Inliner::getInlineThreshold(CallSite CS) const { - int Threshold = InlineThreshold; // -inline-threshold or else selected by - // overall opt level - - // If -inline-threshold is not given, listen to the optsize attribute when it - // would decrease the threshold. - Function *Caller = CS.getCaller(); - bool OptSize = Caller && !Caller->isDeclaration() && - // FIXME: Use Function::optForSize(). - Caller->hasFnAttribute(Attribute::OptimizeForSize); - if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && - OptSizeThreshold < Threshold) - Threshold = OptSizeThreshold; - - Function *Callee = CS.getCalledFunction(); - if (!Callee || Callee->isDeclaration()) - return Threshold; - - // If profile information is available, use that to adjust threshold of hot - // and cold functions. - // FIXME: The heuristic used below for determining hotness and coldness are - // based on preliminary SPEC tuning and may not be optimal. Replace this with - // a well-tuned heuristic based on *callsite* hotness and not callee hotness. - uint64_t FunctionCount = 0, MaxFunctionCount = 0; - bool HasPGOCounts = false; - if (Callee->getEntryCount() && - Callee->getParent()->getMaximumFunctionCount()) { - HasPGOCounts = true; - FunctionCount = Callee->getEntryCount().getValue(); - MaxFunctionCount = - Callee->getParent()->getMaximumFunctionCount().getValue(); - } - - // Listen to the inlinehint attribute or profile based hotness information - // when it would increase the threshold and the caller does not need to - // minimize its size. - bool InlineHint = - Callee->hasFnAttribute(Attribute::InlineHint) || - (HasPGOCounts && - FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); - if (InlineHint && HintThreshold > Threshold && - !Caller->hasFnAttribute(Attribute::MinSize)) - Threshold = HintThreshold; - - // Listen to the cold attribute or profile based coldness information - // when it would decrease the threshold. - bool ColdCallee = - Callee->hasFnAttribute(Attribute::Cold) || - (HasPGOCounts && - FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); - // Command line argument for InlineLimit will override the default - // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, - // do not use the default cold threshold even if it is smaller. - if ((InlineLimit.getNumOccurrences() == 0 || - ColdThreshold.getNumOccurrences() > 0) && ColdCallee && - ColdThreshold < Threshold) - Threshold = ColdThreshold; - - return Threshold; -} - static void emitAnalysis(CallSite CS, const Twine &Msg) { Function *Caller = CS.getCaller(); LLVMContext &Ctx = Caller->getContext(); @@ -311,6 +229,76 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) { emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); } +bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, + int &TotalSecondaryCost) { + + // For now we only handle local or inline functions. + if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) + return false; + // Try to detect the case where the current inlining candidate caller (call + // it B) is a static or linkonce-ODR function and is an inlining candidate + // elsewhere, and the current candidate callee (call it C) is large enough + // that inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B into + // its callers. + // + // This only applies to static and linkonce-ODR functions because those are + // expected to be available for inlining in the translation units where they + // are used. Thus we will always have the opportunity to make local inlining + // decisions. Importantly the linkonce-ODR linkage covers inline functions + // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. + TotalSecondaryCost = 0; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); + // This bool tracks what happens if we do NOT inline C into B. + bool callerWillBeRemoved = Caller->hasLocalLinkage(); + // This bool tracks what happens if we DO inline C into B. + bool inliningPreventsSomeOuterInline = false; + for (User *U : Caller->users()) { + CallSite CS2(U); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. Such references will prevent the caller + // from being removed. + if (!CS2 || CS2.getCalledFunction() != Caller) { + callerWillBeRemoved = false; + continue; + } + + InlineCost IC2 = getInlineCost(CS2); + ++NumCallerCallersAnalyzed; + if (!IC2) { + callerWillBeRemoved = false; + continue; + } + if (IC2.isAlways()) + continue; + + // See if inlining or original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { + inliningPreventsSomeOuterInline = true; + TotalSecondaryCost += IC2.getCost(); + } + } + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (callerWillBeRemoved && !Caller->use_empty()) + TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) + return true; + + return false; +} + /// Return true if the inliner should attempt to inline at the given CallSite. bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); @@ -342,77 +330,17 @@ bool Inliner::shouldInline(CallSite CS) { Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } - - // Try to detect the case where the current inlining candidate caller (call - // it B) is a static or linkonce-ODR function and is an inlining candidate - // elsewhere, and the current candidate callee (call it C) is large enough - // that inlining it into B would make B too big to inline later. In these - // circumstances it may be best not to inline C into B, but to inline B into - // its callers. - // - // This only applies to static and linkonce-ODR functions because those are - // expected to be available for inlining in the translation units where they - // are used. Thus we will always have the opportunity to make local inlining - // decisions. Importantly the linkonce-ODR linkage covers inline functions - // and templates in C++. - // - // FIXME: All of this logic should be sunk into getInlineCost. It relies on - // the internal implementation of the inline cost metrics rather than - // treating them as truly abstract units etc. - if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) { - int TotalSecondaryCost = 0; - // The candidate cost to be imposed upon the current function. - int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); - // This bool tracks what happens if we do NOT inline C into B. - bool callerWillBeRemoved = Caller->hasLocalLinkage(); - // This bool tracks what happens if we DO inline C into B. - bool inliningPreventsSomeOuterInline = false; - for (User *U : Caller->users()) { - CallSite CS2(U); - - // If this isn't a call to Caller (it could be some other sort - // of reference) skip it. Such references will prevent the caller - // from being removed. - if (!CS2 || CS2.getCalledFunction() != Caller) { - callerWillBeRemoved = false; - continue; - } - InlineCost IC2 = getInlineCost(CS2); - ++NumCallerCallersAnalyzed; - if (!IC2) { - callerWillBeRemoved = false; - continue; - } - if (IC2.isAlways()) - continue; - - // See if inlining or original callsite would erase the cost delta of - // this callsite. We subtract off the penalty for the call instruction, - // which we would be deleting. - if (IC2.getCostDelta() <= CandidateCost) { - inliningPreventsSomeOuterInline = true; - TotalSecondaryCost += IC2.getCost(); - } - } - // If all outer calls to Caller would get inlined, the cost for the last - // one is set very low by getInlineCost, in anticipation that Caller will - // be removed entirely. We did not account for this above unless there - // is only one caller of Caller. - if (callerWillBeRemoved && !Caller->use_empty()) - TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - - if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { - DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << - " Cost = " << IC.getCost() << - ", outer Cost = " << TotalSecondaryCost << '\n'); - emitAnalysis( - CS, Twine("Not inlining. Cost of inlining " + - CS.getCalledFunction()->getName() + - " increases the cost of inlining " + - CS.getCaller()->getName() + " in other contexts")); - return false; - } + int TotalSecondaryCost = 0; + if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost)) { + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() + << " Cost = " << IC.getCost() + << ", outer Cost = " << TotalSecondaryCost << '\n'); + emitAnalysis(CS, Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); + return false; } DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() @@ -440,8 +368,15 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, } bool Inliner::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; + return inlineCalls(SCC); +} + +bool Inliner::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); ACT = &getAnalysis<AssumptionCacheTracker>(); + PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule()); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); SmallPtrSet<Function*, 8> SCCFunctions; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 21bb5d000bc76..8c5c6f77077c0 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// // // This pass loops over all of the functions and variables in the input module. -// If the function or variable is not in the list of external names given to -// the pass it is marked as internal. +// If the function or variable does not need to be preserved according to the +// client supplied callback, it is marked as internal. // // This transformation would not be legal in a regular compilation, but it gets // extra information from the linker about what is safe. @@ -19,98 +19,77 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Internalize.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include <fstream> #include <set> using namespace llvm; #define DEBUG_TYPE "internalize" -STATISTIC(NumAliases , "Number of aliases internalized"); +STATISTIC(NumAliases, "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); -STATISTIC(NumGlobals , "Number of global vars internalized"); +STATISTIC(NumGlobals, "Number of global vars internalized"); // APIFile - A file which contains a list of symbols that should not be marked // external. static cl::opt<std::string> -APIFile("internalize-public-api-file", cl::value_desc("filename"), - cl::desc("A file containing list of symbol names to preserve")); + APIFile("internalize-public-api-file", cl::value_desc("filename"), + cl::desc("A file containing list of symbol names to preserve")); // APIList - A list of symbols that should not be marked internal. static cl::list<std::string> -APIList("internalize-public-api-list", cl::value_desc("list"), - cl::desc("A list of symbol names to preserve"), - cl::CommaSeparated); + APIList("internalize-public-api-list", cl::value_desc("list"), + cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); namespace { - class InternalizePass : public ModulePass { - std::set<std::string> ExternalNames; - public: - static char ID; // Pass identification, replacement for typeid - explicit InternalizePass(); - explicit InternalizePass(ArrayRef<const char *> ExportList); - void LoadFile(const char *Filename); - bool maybeInternalize(GlobalValue &GV, - const std::set<const Comdat *> &ExternalComdats); - void checkComdatVisibility(GlobalValue &GV, - std::set<const Comdat *> &ExternalComdats); - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addPreserved<CallGraphWrapperPass>(); - } - }; -} // end anonymous namespace - -char InternalizePass::ID = 0; -INITIALIZE_PASS(InternalizePass, "internalize", - "Internalize Global Symbols", false, false) - -InternalizePass::InternalizePass() : ModulePass(ID) { - initializeInternalizePassPass(*PassRegistry::getPassRegistry()); - if (!APIFile.empty()) // If a filename is specified, use it. - LoadFile(APIFile.c_str()); - ExternalNames.insert(APIList.begin(), APIList.end()); -} - -InternalizePass::InternalizePass(ArrayRef<const char *> ExportList) - : ModulePass(ID) { - initializeInternalizePassPass(*PassRegistry::getPassRegistry()); - for(ArrayRef<const char *>::const_iterator itr = ExportList.begin(); - itr != ExportList.end(); itr++) { - ExternalNames.insert(*itr); +// Helper to load an API list to preserve from file and expose it as a functor +// for internalization. +class PreserveAPIList { +public: + PreserveAPIList() { + if (!APIFile.empty()) + LoadFile(APIFile); + ExternalNames.insert(APIList.begin(), APIList.end()); } -} -void InternalizePass::LoadFile(const char *Filename) { - // Load the APIFile... - std::ifstream In(Filename); - if (!In.good()) { - errs() << "WARNING: Internalize couldn't load file '" << Filename - << "'! Continuing as if it's empty.\n"; - return; // Just continue as if the file were empty + bool operator()(const GlobalValue &GV) { + return ExternalNames.count(GV.getName()); } - while (In) { - std::string Symbol; - In >> Symbol; - if (!Symbol.empty()) - ExternalNames.insert(Symbol); + +private: + // Contains the set of symbols loaded from file + StringSet<> ExternalNames; + + void LoadFile(StringRef Filename) { + // Load the APIFile... + std::ifstream In(Filename.data()); + if (!In.good()) { + errs() << "WARNING: Internalize couldn't load file '" << Filename + << "'! Continuing as if it's empty.\n"; + return; // Just continue as if the file were empty + } + while (In) { + std::string Symbol; + In >> Symbol; + if (!Symbol.empty()) + ExternalNames.insert(Symbol); + } } -} +}; +} // end anonymous namespace -static bool isExternallyVisible(const GlobalValue &GV, - const std::set<std::string> &ExternalNames) { +bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) { // Function must be defined here if (GV.isDeclaration()) return true; @@ -123,15 +102,17 @@ static bool isExternallyVisible(const GlobalValue &GV, if (GV.hasDLLExportStorageClass()) return true; - // Marked to keep external? - if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) + // Already local, has nothing to do. + if (GV.hasLocalLinkage()) + return false; + + // Check some special cases + if (AlwaysPreserved.count(GV.getName())) return true; - return false; + return MustPreserveGV(GV); } -// Internalize GV if it is possible to do so, i.e. it is not externally visible -// and is not a member of an externally visible comdat. bool InternalizePass::maybeInternalize( GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) { if (Comdat *C = GV.getComdat()) { @@ -148,7 +129,7 @@ bool InternalizePass::maybeInternalize( if (GV.hasLocalLinkage()) return false; - if (isExternallyVisible(GV, ExternalNames)) + if (shouldPreserveGV(GV)) return false; } @@ -165,13 +146,12 @@ void InternalizePass::checkComdatVisibility( if (!C) return; - if (isExternallyVisible(GV, ExternalNames)) + if (shouldPreserveGV(GV)) ExternalComdats.insert(C); } -bool InternalizePass::runOnModule(Module &M) { - CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); - CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; +bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { + bool Changed = false; CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; SmallPtrSet<GlobalValue *, 8> Used; @@ -198,13 +178,14 @@ bool InternalizePass::runOnModule(Module &M) { // conservative, we internalize symbols in llvm.compiler.used, but we // keep llvm.compiler.used so that the symbol is not deleted by llvm. for (GlobalValue *V : Used) { - ExternalNames.insert(V->getName()); + AlwaysPreserved.insert(V->getName()); } // Mark all functions not in the api as internal. for (Function &I : M) { if (!maybeInternalize(I, ExternalComdats)) continue; + Changed = true; if (ExternalNode) // Remove a callgraph edge from the external node to this function. @@ -217,53 +198,97 @@ bool InternalizePass::runOnModule(Module &M) { // Never internalize the llvm.used symbol. It is used to implement // attribute((used)). // FIXME: Shouldn't this just filter on llvm.metadata section?? - ExternalNames.insert("llvm.used"); - ExternalNames.insert("llvm.compiler.used"); + AlwaysPreserved.insert("llvm.used"); + AlwaysPreserved.insert("llvm.compiler.used"); // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) - ExternalNames.insert("llvm.global_ctors"); - ExternalNames.insert("llvm.global_dtors"); - ExternalNames.insert("llvm.global.annotations"); + AlwaysPreserved.insert("llvm.global_ctors"); + AlwaysPreserved.insert("llvm.global_dtors"); + AlwaysPreserved.insert("llvm.global.annotations"); // Never internalize symbols code-gen inserts. // FIXME: We should probably add this (and the __stack_chk_guard) via some // type of call-back in CodeGen. - ExternalNames.insert("__stack_chk_fail"); - ExternalNames.insert("__stack_chk_guard"); + AlwaysPreserved.insert("__stack_chk_fail"); + AlwaysPreserved.insert("__stack_chk_guard"); // Mark all global variables with initializers that are not in the api as // internal as well. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!maybeInternalize(*I, ExternalComdats)) + for (auto &GV : M.globals()) { + if (!maybeInternalize(GV, ExternalComdats)) continue; + Changed = true; ++NumGlobals; - DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n"); } // Mark all aliases that are not in the api as internal as well. - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - if (!maybeInternalize(*I, ExternalComdats)) + for (auto &GA : M.aliases()) { + if (!maybeInternalize(GA, ExternalComdats)) continue; + Changed = true; ++NumAliases; - DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n"); } - // We do not keep track of whether this pass changed the module because - // it adds unnecessary complexity: - // 1) This pass will generally be near the start of the pass pipeline, so - // there will be no analyses to invalidate. - // 2) This pass will most likely end up changing the module and it isn't worth - // worrying about optimizing the case where the module is unchanged. - return true; + return Changed; } -ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } +InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {} + +PreservedAnalyses InternalizePass::run(Module &M, AnalysisManager<Module> &AM) { + if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M))) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve<CallGraphAnalysis>(); + return PA; +} + +namespace { +class InternalizeLegacyPass : public ModulePass { + // Client supplied callback to control wheter a symbol must be preserved. + std::function<bool(const GlobalValue &)> MustPreserveGV; + +public: + static char ID; // Pass identification, replacement for typeid + + InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {} + + InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV) + : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) { + initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + + CallGraphWrapperPass *CGPass = + getAnalysisIfAvailable<CallGraphWrapperPass>(); + CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; + return internalizeModule(M, MustPreserveGV, CG); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<CallGraphWrapperPass>(); + } +}; +} + +char InternalizeLegacyPass::ID = 0; +INITIALIZE_PASS(InternalizeLegacyPass, "internalize", + "Internalize Global Symbols", false, false) + +ModulePass *llvm::createInternalizePass() { + return new InternalizeLegacyPass(); +} -ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) { - return new InternalizePass(ExportList); +ModulePass *llvm::createInternalizePass( + std::function<bool(const GlobalValue &)> MustPreserveGV) { + return new InternalizeLegacyPass(std::move(MustPreserveGV)); } diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index b5410f5f77577..bc3df98d504ca 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize +required_libraries = Analysis Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 3c6a7bb7a17ab..f898c3b5a9358 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -81,7 +81,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) { - if (skipOptnoneFunction(L)) + if (skipLoop(L)) return false; // Only visit top-level loops. @@ -249,6 +249,9 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) { } bool BlockExtractorPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + std::set<BasicBlock*> TranslatedBlocksToNotExtract; for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) { BasicBlock *BB = BlocksToNotExtract[i]; @@ -272,15 +275,13 @@ bool BlockExtractorPass::runOnModule(Module &M) { std::string &FuncName = BlocksToNotExtractByName.back().first; std::string &BlockName = BlocksToNotExtractByName.back().second; - for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { - Function &F = *FI; + for (Function &F : M) { if (F.getName() != FuncName) continue; - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - BasicBlock &BB = *BI; + for (BasicBlock &BB : F) { if (BB.getName() != BlockName) continue; - TranslatedBlocksToNotExtract.insert(&*BI); + TranslatedBlocksToNotExtract.insert(&BB); } } @@ -290,18 +291,18 @@ bool BlockExtractorPass::runOnModule(Module &M) { // Now that we know which blocks to not extract, figure out which ones we WANT // to extract. std::vector<BasicBlock*> BlocksToExtract; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - SplitLandingPadPreds(&*F); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - if (!TranslatedBlocksToNotExtract.count(&*BB)) - BlocksToExtract.push_back(&*BB); + for (Function &F : M) { + SplitLandingPadPreds(&F); + for (BasicBlock &BB : F) + if (!TranslatedBlocksToNotExtract.count(&BB)) + BlocksToExtract.push_back(&BB); } - for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { + for (BasicBlock *BlockToExtract : BlocksToExtract) { SmallVector<BasicBlock*, 2> BlocksToExtractVec; - BlocksToExtractVec.push_back(BlocksToExtract[i]); + BlocksToExtractVec.push_back(BlockToExtract); if (const InvokeInst *II = - dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator())) + dyn_cast<InvokeInst>(BlockToExtract->getTerminator())) BlocksToExtractVec.push_back(II->getUnwindDest()); CodeExtractor(BlocksToExtractVec).extractCodeRegion(); } diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 7b515745c3122..36089f0a88018 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1,4 +1,4 @@ -//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// +//===-- LowerTypeTests.cpp - type metadata lowering pass ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. -// See http://llvm.org/docs/LangRef.html#bitsets for more information. +// This pass lowers type metadata and calls to the llvm.type.test intrinsic. +// See http://llvm.org/docs/TypeMetadata.html for more information. // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/LowerBitSets.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" @@ -33,17 +33,18 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +using namespace lowertypetests; -#define DEBUG_TYPE "lowerbitsets" +#define DEBUG_TYPE "lowertypetests" STATISTIC(ByteArraySizeBits, "Byte array size in bits"); STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); -STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); -STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); +STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered"); +STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); static cl::opt<bool> AvoidReuse( - "lowerbitsets-avoid-reuse", + "lowertypetests-avoid-reuse", cl::desc("Try to avoid reuse of byte array addresses using aliases"), cl::Hidden, cl::init(true)); @@ -203,10 +204,10 @@ struct ByteArrayInfo { Constant *Mask; }; -struct LowerBitSets : public ModulePass { +struct LowerTypeTests : public ModulePass { static char ID; - LowerBitSets() : ModulePass(ID) { - initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); + LowerTypeTests() : ModulePass(ID) { + initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } Module *M; @@ -221,105 +222,68 @@ struct LowerBitSets : public ModulePass { IntegerType *Int64Ty; IntegerType *IntPtrTy; - // The llvm.bitsets named metadata. - NamedMDNode *BitSetNM; - - // Mapping from bitset identifiers to the call sites that test them. - DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites; + // Mapping from type identifiers to the call sites that test them. + DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites; std::vector<ByteArrayInfo> ByteArrayInfos; BitSetInfo - buildBitSet(Metadata *BitSet, + buildBitSet(Metadata *TypeId, const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); ByteArrayInfo *createByteArray(BitSetInfo &BSI); void allocateByteArrays(); Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, Value *BitOffset); - void lowerBitSetCalls(ArrayRef<Metadata *> BitSets, - Constant *CombinedGlobalAddr, - const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); + void + lowerTypeTestCalls(ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); Value * lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, Constant *CombinedGlobal, const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); - void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets, + void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalVariable *> Globals); unsigned getJumpTableEntrySize(); Type *getJumpTableEntryType(); Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest, unsigned Distance); - void verifyBitSetMDNode(MDNode *Op); - void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, + void verifyTypeMDNode(GlobalObject *GO, MDNode *Type); + void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds, ArrayRef<Function *> Functions); - void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets, + void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalObject *> Globals); - bool buildBitSets(); - bool eraseBitSetMetadata(); - - bool doInitialization(Module &M) override; + bool lower(); bool runOnModule(Module &M) override; }; } // anonymous namespace -INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", - "Lower bitset metadata", false, false) -INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", - "Lower bitset metadata", false, false) -char LowerBitSets::ID = 0; - -ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } - -bool LowerBitSets::doInitialization(Module &Mod) { - M = &Mod; - const DataLayout &DL = Mod.getDataLayout(); - - Triple TargetTriple(M->getTargetTriple()); - LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); - Arch = TargetTriple.getArch(); - ObjectFormat = TargetTriple.getObjectFormat(); +INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, + false) +char LowerTypeTests::ID = 0; - Int1Ty = Type::getInt1Ty(M->getContext()); - Int8Ty = Type::getInt8Ty(M->getContext()); - Int32Ty = Type::getInt32Ty(M->getContext()); - Int32PtrTy = PointerType::getUnqual(Int32Ty); - Int64Ty = Type::getInt64Ty(M->getContext()); - IntPtrTy = DL.getIntPtrType(M->getContext(), 0); +ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; } - BitSetNM = M->getNamedMetadata("llvm.bitsets"); - - BitSetTestCallSites.clear(); - - return false; -} - -/// Build a bit set for BitSet using the object layouts in +/// Build a bit set for TypeId using the object layouts in /// GlobalLayout. -BitSetInfo LowerBitSets::buildBitSet( - Metadata *BitSet, +BitSetInfo LowerTypeTests::buildBitSet( + Metadata *TypeId, const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { BitSetBuilder BSB; - // Compute the byte offset of each element of this bitset. - if (BitSetNM) { - for (MDNode *Op : BitSetNM->operands()) { - if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) - continue; - Constant *OpConst = - cast<ConstantAsMetadata>(Op->getOperand(1))->getValue(); - if (auto GA = dyn_cast<GlobalAlias>(OpConst)) - OpConst = GA->getAliasee(); - auto OpGlobal = dyn_cast<GlobalObject>(OpConst); - if (!OpGlobal) + // Compute the byte offset of each address associated with this type + // identifier. + SmallVector<MDNode *, 2> Types; + for (auto &GlobalAndOffset : GlobalLayout) { + Types.clear(); + GlobalAndOffset.first->getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + if (Type->getOperand(1) != TypeId) continue; uint64_t Offset = - cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2)) + cast<ConstantInt>(cast<ConstantAsMetadata>(Type->getOperand(0)) ->getValue())->getZExtValue(); - - Offset += GlobalLayout.find(OpGlobal)->second; - - BSB.addOffset(Offset); + BSB.addOffset(GlobalAndOffset.second + Offset); } } @@ -341,7 +305,7 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); } -ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { +ByteArrayInfo *LowerTypeTests::createByteArray(BitSetInfo &BSI) { // Create globals to stand in for byte arrays and masks. These never actually // get initialized, we RAUW and erase them later in allocateByteArrays() once // we know the offset and mask to use. @@ -360,7 +324,7 @@ ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { return BAI; } -void LowerBitSets::allocateByteArrays() { +void LowerTypeTests::allocateByteArrays() { std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { return BAI1.BitSize > BAI2.BitSize; @@ -413,8 +377,8 @@ void LowerBitSets::allocateByteArrays() { /// Build a test that bit BitOffset is set in BSI, where /// BitSetGlobal is a global containing the bits in BSI. -Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, - ByteArrayInfo *&BAI, Value *BitOffset) { +Value *LowerTypeTests::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, + ByteArrayInfo *&BAI, Value *BitOffset) { if (BSI.BitSize <= 64) { // If the bit set is sufficiently small, we can avoid a load by bit testing // a constant. @@ -454,9 +418,9 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, } } -/// Lower a llvm.bitset.test call to its implementation. Returns the value to +/// Lower a llvm.type.test call to its implementation. Returns the value to /// replace the call with. -Value *LowerBitSets::lowerBitSetCall( +Value *LowerTypeTests::lowerBitSetCall( CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, Constant *CombinedGlobalIntAddr, const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { @@ -524,10 +488,10 @@ Value *LowerBitSets::lowerBitSetCall( return P; } -/// Given a disjoint set of bitsets and globals, layout the globals, build the -/// bit sets and lower the llvm.bitset.test calls. -void LowerBitSets::buildBitSetsFromGlobalVariables( - ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) { +/// Given a disjoint set of type identifiers and globals, lay out the globals, +/// build the bit sets and lower the llvm.type.test calls. +void LowerTypeTests::buildBitSetsFromGlobalVariables( + ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalVariable *> Globals) { // Build a new global with the combined contents of the referenced globals. // This global is a struct whose even-indexed elements contain the original // contents of the referenced globals and whose odd-indexed elements contain @@ -544,7 +508,7 @@ void LowerBitSets::buildBitSetsFromGlobalVariables( // Cap at 128 was found experimentally to have a good data/instruction // overhead tradeoff. if (Padding > 128) - Padding = RoundUpToAlignment(InitSize, 128) - InitSize; + Padding = alignTo(InitSize, 128) - InitSize; GlobalInits.push_back( ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); @@ -565,7 +529,7 @@ void LowerBitSets::buildBitSetsFromGlobalVariables( // Multiply by 2 to account for padding elements. GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); - lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout); + lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each // global from which we built the combined global, and replace references @@ -591,19 +555,19 @@ void LowerBitSets::buildBitSetsFromGlobalVariables( } } -void LowerBitSets::lowerBitSetCalls( - ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr, +void LowerTypeTests::lowerTypeTestCalls( + ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { Constant *CombinedGlobalIntAddr = ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); - // For each bitset in this disjoint set... - for (Metadata *BS : BitSets) { + // For each type identifier in this disjoint set... + for (Metadata *TypeId : TypeIds) { // Build the bitset. - BitSetInfo BSI = buildBitSet(BS, GlobalLayout); + BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout); DEBUG({ - if (auto BSS = dyn_cast<MDString>(BS)) - dbgs() << BSS->getString() << ": "; + if (auto MDS = dyn_cast<MDString>(TypeId)) + dbgs() << MDS->getString() << ": "; else dbgs() << "<unnamed>: "; BSI.print(dbgs()); @@ -611,9 +575,9 @@ void LowerBitSets::lowerBitSetCalls( ByteArrayInfo *BAI = nullptr; - // Lower each call to llvm.bitset.test for this bitset. - for (CallInst *CI : BitSetTestCallSites[BS]) { - ++NumBitSetCallsLowered; + // Lower each call to llvm.type.test for this type identifier. + for (CallInst *CI : TypeTestCallSites[TypeId]) { + ++NumTypeTestCallsLowered; Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout); CI->replaceAllUsesWith(Lowered); @@ -622,39 +586,32 @@ void LowerBitSets::lowerBitSetCalls( } } -void LowerBitSets::verifyBitSetMDNode(MDNode *Op) { - if (Op->getNumOperands() != 3) +void LowerTypeTests::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { + if (Type->getNumOperands() != 2) report_fatal_error( - "All operands of llvm.bitsets metadata must have 3 elements"); - if (!Op->getOperand(1)) - return; - - auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1)); - if (!OpConstMD) - report_fatal_error("Bit set element must be a constant"); - auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue()); - if (!OpGlobal) - return; + "All operands of type metadata must have 2 elements"); - if (OpGlobal->isThreadLocal()) + if (GO->isThreadLocal()) report_fatal_error("Bit set element may not be thread-local"); - if (OpGlobal->hasSection()) - report_fatal_error("Bit set element may not have an explicit section"); + if (isa<GlobalVariable>(GO) && GO->hasSection()) + report_fatal_error( + "A member of a type identifier may not have an explicit section"); - if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker()) - report_fatal_error("Bit set global var element must be a definition"); + if (isa<GlobalVariable>(GO) && GO->isDeclarationForLinker()) + report_fatal_error( + "A global var member of a type identifier must be a definition"); - auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); + auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0)); if (!OffsetConstMD) - report_fatal_error("Bit set element offset must be a constant"); + report_fatal_error("Type offset must be a constant"); auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); if (!OffsetInt) - report_fatal_error("Bit set element offset must be an integer constant"); + report_fatal_error("Type offset must be an integer constant"); } static const unsigned kX86JumpTableEntrySize = 8; -unsigned LowerBitSets::getJumpTableEntrySize() { +unsigned LowerTypeTests::getJumpTableEntrySize() { if (Arch != Triple::x86 && Arch != Triple::x86_64) report_fatal_error("Unsupported architecture for jump tables"); @@ -665,8 +622,9 @@ unsigned LowerBitSets::getJumpTableEntrySize() { // consists of an instruction sequence containing a relative branch to Dest. The // constant will be laid out at address Src+(Len*Distance) where Len is the // target-specific jump table entry size. -Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest, - unsigned Distance) { +Constant *LowerTypeTests::createJumpTableEntry(GlobalObject *Src, + Function *Dest, + unsigned Distance) { if (Arch != Triple::x86 && Arch != Triple::x86_64) report_fatal_error("Unsupported architecture for jump tables"); @@ -693,7 +651,7 @@ Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest, return ConstantStruct::getAnon(Fields, /*Packed=*/true); } -Type *LowerBitSets::getJumpTableEntryType() { +Type *LowerTypeTests::getJumpTableEntryType() { if (Arch != Triple::x86 && Arch != Triple::x86_64) report_fatal_error("Unsupported architecture for jump tables"); @@ -702,10 +660,10 @@ Type *LowerBitSets::getJumpTableEntryType() { /*Packed=*/true); } -/// Given a disjoint set of bitsets and functions, build a jump table for the -/// functions, build the bit sets and lower the llvm.bitset.test calls. -void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, - ArrayRef<Function *> Functions) { +/// Given a disjoint set of type identifiers and functions, build a jump table +/// for the functions, build the bit sets and lower the llvm.type.test calls. +void LowerTypeTests::buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds, + ArrayRef<Function *> Functions) { // Unlike the global bitset builder, the function bitset builder cannot // re-arrange functions in a particular order and base its calculations on the // layout of the functions' entry points, as we have no idea how large a @@ -719,8 +677,7 @@ void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, // verification done inside the module. // // In more concrete terms, suppose we have three functions f, g, h which are - // members of a single bitset, and a function foo that returns their - // addresses: + // of the same type, and a function foo that returns their addresses: // // f: // mov 0, %eax @@ -803,7 +760,7 @@ void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, JumpTable->setSection(ObjectFormat == Triple::MachO ? "__TEXT,__text,regular,pure_instructions" : ".text"); - lowerBitSetCalls(BitSets, JumpTable, GlobalLayout); + lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); // Build aliases pointing to offsets into the jump table, and replace // references to the original functions with references to the aliases. @@ -838,39 +795,32 @@ void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, ConstantArray::get(JumpTableType, JumpTableEntries)); } -void LowerBitSets::buildBitSetsFromDisjointSet( - ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) { - llvm::DenseMap<Metadata *, uint64_t> BitSetIndices; - llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices; - for (unsigned I = 0; I != BitSets.size(); ++I) - BitSetIndices[BitSets[I]] = I; - for (unsigned I = 0; I != Globals.size(); ++I) - GlobalIndices[Globals[I]] = I; - - // For each bitset, build a set of indices that refer to globals referenced by - // the bitset. - std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); - if (BitSetNM) { - for (MDNode *Op : BitSetNM->operands()) { - // Op = { bitset name, global, offset } - if (!Op->getOperand(1)) - continue; - auto I = BitSetIndices.find(Op->getOperand(0)); - if (I == BitSetIndices.end()) - continue; - - auto OpGlobal = dyn_cast<GlobalObject>( - cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); - if (!OpGlobal) - continue; - BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); +void LowerTypeTests::buildBitSetsFromDisjointSet( + ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalObject *> Globals) { + llvm::DenseMap<Metadata *, uint64_t> TypeIdIndices; + for (unsigned I = 0; I != TypeIds.size(); ++I) + TypeIdIndices[TypeIds[I]] = I; + + // For each type identifier, build a set of indices that refer to members of + // the type identifier. + std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size()); + SmallVector<MDNode *, 2> Types; + unsigned GlobalIndex = 0; + for (GlobalObject *GO : Globals) { + Types.clear(); + GO->getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + // Type = { offset, type identifier } + unsigned TypeIdIndex = TypeIdIndices[Type->getOperand(1)]; + TypeMembers[TypeIdIndex].insert(GlobalIndex); } + GlobalIndex++; } // Order the sets of indices by size. The GlobalLayoutBuilder works best // when given small index sets first. std::stable_sort( - BitSetMembers.begin(), BitSetMembers.end(), + TypeMembers.begin(), TypeMembers.end(), [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { return O1.size() < O2.size(); }); @@ -879,7 +829,7 @@ void LowerBitSets::buildBitSetsFromDisjointSet( // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as // close together as possible. GlobalLayoutBuilder GLB(Globals.size()); - for (auto &&MemSet : BitSetMembers) + for (auto &&MemSet : TypeMembers) GLB.addFragment(MemSet); // Build the bitsets from this disjoint set. @@ -891,13 +841,13 @@ void LowerBitSets::buildBitSetsFromDisjointSet( for (auto &&Offset : F) { auto GV = dyn_cast<GlobalVariable>(Globals[Offset]); if (!GV) - report_fatal_error( - "Bit set may not contain both global variables and functions"); + report_fatal_error("Type identifier may not contain both global " + "variables and functions"); *OGI++ = GV; } } - buildBitSetsFromGlobalVariables(BitSets, OrderedGVs); + buildBitSetsFromGlobalVariables(TypeIds, OrderedGVs); } else { // Build a vector of functions with the computed layout. std::vector<Function *> OrderedFns(Globals.size()); @@ -906,102 +856,97 @@ void LowerBitSets::buildBitSetsFromDisjointSet( for (auto &&Offset : F) { auto Fn = dyn_cast<Function>(Globals[Offset]); if (!Fn) - report_fatal_error( - "Bit set may not contain both global variables and functions"); + report_fatal_error("Type identifier may not contain both global " + "variables and functions"); *OFI++ = Fn; } } - buildBitSetsFromFunctions(BitSets, OrderedFns); + buildBitSetsFromFunctions(TypeIds, OrderedFns); } } -/// Lower all bit sets in this module. -bool LowerBitSets::buildBitSets() { - Function *BitSetTestFunc = - M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); - if (!BitSetTestFunc) +/// Lower all type tests in this module. +bool LowerTypeTests::lower() { + Function *TypeTestFunc = + M->getFunction(Intrinsic::getName(Intrinsic::type_test)); + if (!TypeTestFunc || TypeTestFunc->use_empty()) return false; - // Equivalence class set containing bitsets and the globals they reference. - // This is used to partition the set of bitsets in the module into disjoint - // sets. + // Equivalence class set containing type identifiers and the globals that + // reference them. This is used to partition the set of type identifiers in + // the module into disjoint sets. typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>> GlobalClassesTy; GlobalClassesTy GlobalClasses; - // Verify the bitset metadata and build a mapping from bitset identifiers to - // their last observed index in BitSetNM. This will used later to - // deterministically order the list of bitset identifiers. - llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices; - if (BitSetNM) { - for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) { - MDNode *Op = BitSetNM->getOperand(I); - verifyBitSetMDNode(Op); - BitSetIdIndices[Op->getOperand(0)] = I; + // Verify the type metadata and build a mapping from type identifiers to their + // last observed index in the list of globals. This will be used later to + // deterministically order the list of type identifiers. + llvm::DenseMap<Metadata *, unsigned> TypeIdIndices; + unsigned I = 0; + SmallVector<MDNode *, 2> Types; + for (GlobalObject &GO : M->global_objects()) { + Types.clear(); + GO.getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) { + verifyTypeMDNode(&GO, Type); + TypeIdIndices[cast<MDNode>(Type)->getOperand(1)] = ++I; } } - for (const Use &U : BitSetTestFunc->uses()) { + for (const Use &U : TypeTestFunc->uses()) { auto CI = cast<CallInst>(U.getUser()); auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); if (!BitSetMDVal) report_fatal_error( - "Second argument of llvm.bitset.test must be metadata"); + "Second argument of llvm.type.test must be metadata"); auto BitSet = BitSetMDVal->getMetadata(); - // Add the call site to the list of call sites for this bit set. We also use - // BitSetTestCallSites to keep track of whether we have seen this bit set - // before. If we have, we don't need to re-add the referenced globals to the - // equivalence class. - std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, - bool> Ins = - BitSetTestCallSites.insert( + // Add the call site to the list of call sites for this type identifier. We + // also use TypeTestCallSites to keep track of whether we have seen this + // type identifier before. If we have, we don't need to re-add the + // referenced globals to the equivalence class. + std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, bool> + Ins = TypeTestCallSites.insert( std::make_pair(BitSet, std::vector<CallInst *>())); Ins.first->second.push_back(CI); if (!Ins.second) continue; - // Add the bitset to the equivalence class. + // Add the type identifier to the equivalence class. GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); - if (!BitSetNM) - continue; - - // Add the referenced globals to the bitset's equivalence class. - for (MDNode *Op : BitSetNM->operands()) { - if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) - continue; - - auto OpGlobal = dyn_cast<GlobalObject>( - cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); - if (!OpGlobal) - continue; - - CurSet = GlobalClasses.unionSets( - CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); + // Add the referenced globals to the type identifier's equivalence class. + for (GlobalObject &GO : M->global_objects()) { + Types.clear(); + GO.getMetadata(LLVMContext::MD_type, Types); + for (MDNode *Type : Types) + if (Type->getOperand(1) == BitSet) + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(&GO))); } } if (GlobalClasses.empty()) return false; - // Build a list of disjoint sets ordered by their maximum BitSetNM index - // for determinism. + // Build a list of disjoint sets ordered by their maximum global index for + // determinism. std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets; for (GlobalClassesTy::iterator I = GlobalClasses.begin(), E = GlobalClasses.end(); I != E; ++I) { if (!I->isLeader()) continue; - ++NumBitSetDisjointSets; + ++NumTypeIdDisjointSets; unsigned MaxIndex = 0; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); MI != GlobalClasses.member_end(); ++MI) { if ((*MI).is<Metadata *>()) - MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]); + MaxIndex = std::max(MaxIndex, TypeIdIndices[MI->get<Metadata *>()]); } Sets.emplace_back(I, MaxIndex); } @@ -1013,26 +958,26 @@ bool LowerBitSets::buildBitSets() { // For each disjoint set we found... for (const auto &S : Sets) { - // Build the list of bitsets in this disjoint set. - std::vector<Metadata *> BitSets; + // Build the list of type identifiers in this disjoint set. + std::vector<Metadata *> TypeIds; std::vector<GlobalObject *> Globals; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(S.first); MI != GlobalClasses.member_end(); ++MI) { if ((*MI).is<Metadata *>()) - BitSets.push_back(MI->get<Metadata *>()); + TypeIds.push_back(MI->get<Metadata *>()); else Globals.push_back(MI->get<GlobalObject *>()); } - // Order bitsets by BitSetNM index for determinism. This ordering is stable - // as there is a one-to-one mapping between metadata and indices. - std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) { - return BitSetIdIndices[M1] < BitSetIdIndices[M2]; + // Order type identifiers by global index for determinism. This ordering is + // stable as there is a one-to-one mapping between metadata and indices. + std::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) { + return TypeIdIndices[M1] < TypeIdIndices[M2]; }); - // Lower the bitsets in this disjoint set. - buildBitSetsFromDisjointSet(BitSets, Globals); + // Build bitsets for this disjoint set. + buildBitSetsFromDisjointSet(TypeIds, Globals); } allocateByteArrays(); @@ -1040,16 +985,36 @@ bool LowerBitSets::buildBitSets() { return true; } -bool LowerBitSets::eraseBitSetMetadata() { - if (!BitSetNM) - return false; +// Initialization helper shared by the old and the new PM. +static void init(LowerTypeTests *LTT, Module &M) { + LTT->M = &M; + const DataLayout &DL = M.getDataLayout(); + Triple TargetTriple(M.getTargetTriple()); + LTT->LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + LTT->Arch = TargetTriple.getArch(); + LTT->ObjectFormat = TargetTriple.getObjectFormat(); + LTT->Int1Ty = Type::getInt1Ty(M.getContext()); + LTT->Int8Ty = Type::getInt8Ty(M.getContext()); + LTT->Int32Ty = Type::getInt32Ty(M.getContext()); + LTT->Int32PtrTy = PointerType::getUnqual(LTT->Int32Ty); + LTT->Int64Ty = Type::getInt64Ty(M.getContext()); + LTT->IntPtrTy = DL.getIntPtrType(M.getContext(), 0); + LTT->TypeTestCallSites.clear(); +} - M->eraseNamedMetadata(BitSetNM); - return true; +bool LowerTypeTests::runOnModule(Module &M) { + if (skipModule(M)) + return false; + init(this, M); + return lower(); } -bool LowerBitSets::runOnModule(Module &M) { - bool Changed = buildBitSets(); - Changed |= eraseBitSetMetadata(); - return Changed; +PreservedAnalyses LowerTypeTestsPass::run(Module &M, + AnalysisManager<Module> &AM) { + LowerTypeTests Impl; + init(&Impl, M); + bool Changed = Impl.lower(); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); } diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile deleted file mode 100644 index 5c42374139aaa..0000000000000 --- a/lib/Transforms/IPO/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMipo -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 8a209a18c540e..fe653a75ddb5a 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -89,13 +89,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Hashing.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -112,6 +109,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include <vector> using namespace llvm; @@ -189,7 +187,7 @@ public: private: /// Test whether two basic blocks have equivalent behaviour. - int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR); + int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const; /// Constants comparison. /// Its analog to lexicographical comparison between hypothetical numbers @@ -293,11 +291,11 @@ private: /// look at their particular properties (bit-width for vectors, and /// address space for pointers). /// If these properties are equal - compare their contents. - int cmpConstants(const Constant *L, const Constant *R); + int cmpConstants(const Constant *L, const Constant *R) const; /// Compares two global values by number. Uses the GlobalNumbersState to /// identify the same gobals across function calls. - int cmpGlobalValues(GlobalValue *L, GlobalValue *R); + int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const; /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order @@ -317,11 +315,11 @@ private: /// then left value is greater. /// In another words, we compare serial numbers, for more details /// see comments for sn_mapL and sn_mapR. - int cmpValues(const Value *L, const Value *R); + int cmpValues(const Value *L, const Value *R) const; /// Compare two Instructions for equivalence, similar to - /// Instruction::isSameOperationAs but with modifications to the type - /// comparison. + /// Instruction::isSameOperationAs. + /// /// Stages are listed in "most significant stage first" order: /// On each stage below, we do comparison between some left and right /// operation parts. If parts are non-equal, we assign parts comparison @@ -339,8 +337,9 @@ private: /// For example, for Load it would be: /// 6.1.Load: volatile (as boolean flag) /// 6.2.Load: alignment (as integer numbers) - /// 6.3.Load: synch-scope (as integer numbers) - /// 6.4.Load: range metadata (as integer numbers) + /// 6.3.Load: ordering (as underlying enum class value) + /// 6.4.Load: synch-scope (as integer numbers) + /// 6.5.Load: range metadata (as integer ranges) /// On this stage its better to see the code, since its not more than 10-15 /// strings for particular instruction, and could change sometimes. int cmpOperations(const Instruction *L, const Instruction *R) const; @@ -353,8 +352,9 @@ private: /// 3. Pointer operand type (using cmpType method). /// 4. Number of operands. /// 5. Compare operands, using cmpValues method. - int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR); - int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { + int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const; + int cmpGEPs(const GetElementPtrInst *GEPL, + const GetElementPtrInst *GEPR) const { return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); } @@ -401,12 +401,13 @@ private: int cmpTypes(Type *TyL, Type *TyR) const; int cmpNumbers(uint64_t L, uint64_t R) const; + int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const; int cmpAPInts(const APInt &L, const APInt &R) const; int cmpAPFloats(const APFloat &L, const APFloat &R) const; int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; int cmpMem(StringRef L, StringRef R) const; int cmpAttrs(const AttributeSet L, const AttributeSet R) const; - int cmpRangeMetadata(const MDNode* L, const MDNode* R) const; + int cmpRangeMetadata(const MDNode *L, const MDNode *R) const; int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; // The two functions undergoing comparison. @@ -445,7 +446,7 @@ private: /// But, we are still not able to compare operands of PHI nodes, since those /// could be operands from further BBs we didn't scan yet. /// So it's impossible to use dominance properties in general. - DenseMap<const Value*, int> sn_mapL, sn_mapR; + mutable DenseMap<const Value*, int> sn_mapL, sn_mapR; // The global state we will use GlobalNumberState* GlobalNumbers; @@ -477,6 +478,12 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { return 0; } +int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const { + if ((int)L < (int)R) return -1; + if ((int)L > (int)R) return 1; + return 0; +} + int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) return Res; @@ -538,8 +545,8 @@ int FunctionComparator::cmpAttrs(const AttributeSet L, return 0; } -int FunctionComparator::cmpRangeMetadata(const MDNode* L, - const MDNode* R) const { +int FunctionComparator::cmpRangeMetadata(const MDNode *L, + const MDNode *R) const { if (L == R) return 0; if (!L) @@ -547,7 +554,7 @@ int FunctionComparator::cmpRangeMetadata(const MDNode* L, if (!R) return 1; // Range metadata is a sequence of numbers. Make sure they are the same - // sequence. + // sequence. // TODO: Note that as this is metadata, it is possible to drop and/or merge // this data when considering functions to merge. Thus this comparison would // return 0 (i.e. equivalent), but merging would become more complicated @@ -557,8 +564,8 @@ int FunctionComparator::cmpRangeMetadata(const MDNode* L, if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) return Res; for (size_t I = 0; I < L->getNumOperands(); ++I) { - ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); - ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); + ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); + ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) return Res; } @@ -596,7 +603,8 @@ int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, /// type. /// 2. Compare constant contents. /// For more details see declaration comments. -int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { +int FunctionComparator::cmpConstants(const Constant *L, + const Constant *R) const { Type *TyL = L->getType(); Type *TyR = R->getType(); @@ -793,7 +801,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { } } -int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) { +int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const { return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R)); } @@ -898,9 +906,9 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { int FunctionComparator::cmpOperations(const Instruction *L, const Instruction *R) const { // Differences from Instruction::isSameOperationAs: - // * replace type comparison with calls to isEquivalentType. - // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top - // * because of the above, we don't test for the tail bit on calls later on + // * replace type comparison with calls to cmpTypes. + // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top. + // * because of the above, we don't test for the tail bit on calls later on. if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) return Res; @@ -914,15 +922,6 @@ int FunctionComparator::cmpOperations(const Instruction *L, R->getRawSubclassOptionalData())) return Res; - if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { - if (int Res = cmpTypes(AI->getAllocatedType(), - cast<AllocaInst>(R)->getAllocatedType())) - return Res; - if (int Res = - cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment())) - return Res; - } - // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { @@ -932,6 +931,12 @@ int FunctionComparator::cmpOperations(const Instruction *L, } // Check special state that is a part of some instructions. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { + if (int Res = cmpTypes(AI->getAllocatedType(), + cast<AllocaInst>(R)->getAllocatedType())) + return Res; + return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()); + } if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) return Res; @@ -939,7 +944,7 @@ int FunctionComparator::cmpOperations(const Instruction *L, cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) return Res; if (int Res = - cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) + cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) return Res; if (int Res = cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) @@ -955,7 +960,7 @@ int FunctionComparator::cmpOperations(const Instruction *L, cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) return Res; if (int Res = - cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) + cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) return Res; return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); } @@ -996,6 +1001,7 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(LIndices[i], RIndices[i])) return Res; } + return 0; } if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { ArrayRef<unsigned> LIndices = EVI->getIndices(); @@ -1009,11 +1015,10 @@ int FunctionComparator::cmpOperations(const Instruction *L, } if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { if (int Res = - cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) + cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) return Res; return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); } - if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { if (int Res = cmpNumbers(CXI->isVolatile(), cast<AtomicCmpXchgInst>(R)->isVolatile())) @@ -1021,11 +1026,13 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(CXI->isWeak(), cast<AtomicCmpXchgInst>(R)->isWeak())) return Res; - if (int Res = cmpNumbers(CXI->getSuccessOrdering(), - cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) + if (int Res = + cmpOrderings(CXI->getSuccessOrdering(), + cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) return Res; - if (int Res = cmpNumbers(CXI->getFailureOrdering(), - cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) + if (int Res = + cmpOrderings(CXI->getFailureOrdering(), + cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) return Res; return cmpNumbers(CXI->getSynchScope(), cast<AtomicCmpXchgInst>(R)->getSynchScope()); @@ -1037,19 +1044,30 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(RMWI->isVolatile(), cast<AtomicRMWInst>(R)->isVolatile())) return Res; - if (int Res = cmpNumbers(RMWI->getOrdering(), + if (int Res = cmpOrderings(RMWI->getOrdering(), cast<AtomicRMWInst>(R)->getOrdering())) return Res; return cmpNumbers(RMWI->getSynchScope(), cast<AtomicRMWInst>(R)->getSynchScope()); } + if (const PHINode *PNL = dyn_cast<PHINode>(L)) { + const PHINode *PNR = cast<PHINode>(R); + // Ensure that in addition to the incoming values being identical + // (checked by the caller of this function), the incoming blocks + // are also identical. + for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) { + if (int Res = + cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i))) + return Res; + } + } return 0; } // Determine whether two GEP operations perform the same underlying arithmetic. // Read method declaration comments for more details. int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, - const GEPOperator *GEPR) { + const GEPOperator *GEPR) const { unsigned int ASL = GEPL->getPointerAddressSpace(); unsigned int ASR = GEPR->getPointerAddressSpace(); @@ -1106,7 +1124,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L, /// this is the first time the values are seen, they're added to the mapping so /// that we will detect mismatches on next use. /// See comments in declaration for more details. -int FunctionComparator::cmpValues(const Value *L, const Value *R) { +int FunctionComparator::cmpValues(const Value *L, const Value *R) const { // Catch self-reference case. if (L == FnL) { if (R == FnR) @@ -1149,7 +1167,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { } // Test whether two basic blocks have equivalent behaviour. int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, - const BasicBlock *BBR) { + const BasicBlock *BBR) const { BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); @@ -1186,7 +1204,8 @@ int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, } } - ++InstL, ++InstR; + ++InstL; + ++InstR; } while (InstL != InstLE && InstR != InstRE); if (InstL != InstLE && InstR == InstRE) @@ -1249,7 +1268,7 @@ int FunctionComparator::compare() { // functions, then takes each block from each terminator in order. As an // artifact, this also means that unreachable blocks are ignored. SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; - SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. + SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1. FnLBBs.push_back(&FnL->getEntryBlock()); FnRBBs.push_back(&FnR->getEntryBlock()); @@ -1517,6 +1536,9 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { } bool MergeFunctions::runOnModule(Module &M) { + if (skipModule(M)) + return false; + bool Changed = false; // All functions in the module, ordered by hash. Functions with a unique @@ -1555,28 +1577,12 @@ bool MergeFunctions::runOnModule(Module &M) { DEBUG(dbgs() << "size of module: " << M.size() << '\n'); DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); - // Insert only strong functions and merge them. Strong function merging - // always deletes one of them. - for (std::vector<WeakVH>::iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - if (!*I) continue; - Function *F = cast<Function>(*I); - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && - !F->mayBeOverridden()) { - Changed |= insert(F); - } - } - - // Insert only weak functions and merge them. By doing these second we - // create thunks to the strong function when possible. When two weak - // functions are identical, we create a new strong function with two weak - // weak thunks to it which are identical but not mergable. - for (std::vector<WeakVH>::iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - if (!*I) continue; - Function *F = cast<Function>(*I); - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && - F->mayBeOverridden()) { + // Insert functions and merge them. + for (WeakVH &I : Worklist) { + if (!I) + continue; + Function *F = cast<Function>(I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) { Changed |= insert(F); } } @@ -1631,7 +1637,7 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { // Replace G with an alias to F if possible, or else a thunk to F. Deletes G. void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { - if (HasGlobalAliases && G->hasUnnamedAddr()) { + if (HasGlobalAliases && G->hasGlobalUnnamedAddr()) { if (G->hasExternalLinkage() || G->hasLocalLinkage() || G->hasWeakLinkage()) { writeAlias(F, G); @@ -1645,7 +1651,7 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { // Helper for writeThunk, // Selects proper bitcast operation, // but a bit simpler then CastInst::getCastOpcode. -static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { +static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { Type *SrcTy = V->getType(); if (SrcTy->isStructTy()) { assert(DestTy->isStructTy()); @@ -1673,7 +1679,7 @@ static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { // Replace G with a simple tail call to bitcast(F). Also replace direct uses // of G with bitcast(F). Deletes G. void MergeFunctions::writeThunk(Function *F, Function *G) { - if (!G->mayBeOverridden()) { + if (!G->isInterposable()) { // Redirect direct callers of G to F. replaceDirectCallers(G, F); } @@ -1688,7 +1694,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); - IRBuilder<false> Builder(BB); + IRBuilder<> Builder(BB); SmallVector<Value *, 16> Args; unsigned i = 0; @@ -1734,8 +1740,8 @@ void MergeFunctions::writeAlias(Function *F, Function *G) { // Merge two equivalent functions. Upon completion, Function G is deleted. void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { - if (F->mayBeOverridden()) { - assert(G->mayBeOverridden()); + if (F->isInterposable()) { + assert(G->isInterposable()); // Make them both thunks to the same internal function. Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", @@ -1816,20 +1822,16 @@ bool MergeFunctions::insert(Function *NewFunction) { // important when operating on more than one module independently to prevent // cycles of thunks calling each other when the modules are linked together. // - // When one function is weak and the other is strong there is an order imposed - // already. We process strong functions before weak functions. - if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) || - (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden())) - if (OldF.getFunc()->getName() > NewFunction->getName()) { - // Swap the two functions. - Function *F = OldF.getFunc(); - replaceFunctionInTree(*Result.first, NewFunction); - NewFunction = F; - assert(OldF.getFunc() != F && "Must have swapped the functions."); - } - - // Never thunk a strong function to a weak function. - assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); + // First of all, we process strong functions before weak functions. + if ((OldF.getFunc()->isInterposable() && !NewFunction->isInterposable()) || + (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() && + OldF.getFunc()->getName() > NewFunction->getName())) { + // Swap the two functions. + Function *F = OldF.getFunc(); + replaceFunctionInTree(*Result.first, NewFunction); + NewFunction = F; + assert(OldF.getFunc() != F && "Must have swapped the functions."); + } DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " << NewFunction->getName() << '\n'); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 0c5c84bbccabe..49c44173491ec 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; @@ -28,27 +29,34 @@ using namespace llvm; STATISTIC(NumPartialInlined, "Number of functions partially inlined"); namespace { - struct PartialInliner : public ModulePass { - void getAnalysisUsage(AnalysisUsage &AU) const override { } - static char ID; // Pass identification, replacement for typeid - PartialInliner() : ModulePass(ID) { - initializePartialInlinerPass(*PassRegistry::getPassRegistry()); - } +struct PartialInlinerLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + PartialInlinerLegacyPass() : ModulePass(ID) { + initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); + } - bool runOnModule(Module& M) override; + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + ModuleAnalysisManager DummyMAM; + auto PA = Impl.run(M, DummyMAM); + return !PA.areAllPreserved(); + } - private: - Function* unswitchFunction(Function* F); +private: + PartialInlinerPass Impl; }; } -char PartialInliner::ID = 0; -INITIALIZE_PASS(PartialInliner, "partial-inliner", - "Partial Inliner", false, false) +char PartialInlinerLegacyPass::ID = 0; +INITIALIZE_PASS(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", + false, false) -ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } +ModulePass *llvm::createPartialInliningPass() { + return new PartialInlinerLegacyPass(); +} -Function* PartialInliner::unswitchFunction(Function* F) { +Function *PartialInlinerPass::unswitchFunction(Function *F) { // First, verify that this function is an unswitching candidate... BasicBlock *entryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); @@ -71,10 +79,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; - Function* duplicateFunction = CloneFunction(F, VMap, - /*ModuleLevelChanges=*/false); + Function* duplicateFunction = CloneFunction(F, VMap); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); - F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); @@ -112,11 +118,10 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Gather up the blocks that we're going to extract. std::vector<BasicBlock*> toExtract; toExtract.push_back(newNonReturnBlock); - for (Function::iterator FI = duplicateFunction->begin(), - FE = duplicateFunction->end(); FI != FE; ++FI) - if (&*FI != newEntryBlock && &*FI != newReturnBlock && - &*FI != newNonReturnBlock) - toExtract.push_back(&*FI); + for (BasicBlock &BB : *duplicateFunction) + if (&BB != newEntryBlock && &BB != newReturnBlock && + &BB != newNonReturnBlock) + toExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. DominatorTree DT; @@ -131,11 +136,10 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Inline the top-level if test into all callers. std::vector<User *> Users(duplicateFunction->user_begin(), duplicateFunction->user_end()); - for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); - UI != UE; ++UI) - if (CallInst *CI = dyn_cast<CallInst>(*UI)) + for (User *User : Users) + if (CallInst *CI = dyn_cast<CallInst>(User)) InlineFunction(CI, IFI); - else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) + else if (InvokeInst *II = dyn_cast<InvokeInst>(User)) InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining @@ -148,13 +152,13 @@ Function* PartialInliner::unswitchFunction(Function* F) { return extractedFunction; } -bool PartialInliner::runOnModule(Module& M) { +PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &) { std::vector<Function*> worklist; worklist.reserve(M.size()); - for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) - if (!FI->use_empty() && !FI->isDeclaration()) - worklist.push_back(&*FI); - + for (Function &F : M) + if (!F.use_empty() && !F.isDeclaration()) + worklist.push_back(&F); + bool changed = false; while (!worklist.empty()) { Function* currFunc = worklist.back(); @@ -178,6 +182,8 @@ bool PartialInliner::runOnModule(Module& M) { } } - - return changed; + + if (changed) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index faada9c2a7db6..cf5b76dc365be 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -16,23 +16,27 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -58,10 +62,6 @@ static cl::opt<bool> ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); -static cl::opt<bool> UseNewSROA("use-new-sroa", - cl::init(true), cl::Hidden, - cl::desc("Enable the new, experimental SROA pass")); - static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); @@ -80,9 +80,19 @@ RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " "vectorizer instead of before")); -static cl::opt<bool> UseCFLAA("use-cfl-aa", - cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental CFL alias analysis")); +// Experimental option to use CFL-AA +enum class CFLAAType { None, Steensgaard, Andersen, Both }; +static cl::opt<CFLAAType> + UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis"), + cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), + clEnumValN(CFLAAType::Steensgaard, "steens", + "Enable unification-based CFL-AA"), + clEnumValN(CFLAAType::Andersen, "anders", + "Enable inclusion-based CFL-AA"), + clEnumValN(CFLAAType::Both, "both", + "Enable both variants of CFL-aa"), + clEnumValEnd)); static cl::opt<bool> EnableMLSM("mlsm", cl::init(true), cl::Hidden, @@ -92,25 +102,44 @@ static cl::opt<bool> EnableLoopInterchange( "enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopInterchange Pass")); -static cl::opt<bool> EnableLoopDistribute( - "enable-loop-distribute", cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental LoopDistribution Pass")); - static cl::opt<bool> EnableNonLTOGlobalsModRef( "enable-non-lto-gmr", cl::init(true), cl::Hidden, cl::desc( "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); static cl::opt<bool> EnableLoopLoadElim( - "enable-loop-load-elim", cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental LoopLoadElimination Pass")); + "enable-loop-load-elim", cl::init(true), cl::Hidden, + cl::desc("Enable the LoopLoadElimination Pass")); + +static cl::opt<std::string> RunPGOInstrGen( + "profile-generate", cl::init(""), cl::Hidden, + cl::desc("Enable generation phase of PGO instrumentation and specify the " + "path of profile data file")); + +static cl::opt<std::string> RunPGOInstrUse( + "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"), + cl::desc("Enable use phase of PGO instrumentation and specify the path " + "of profile data file")); + +static cl::opt<bool> UseLoopVersioningLICM( + "enable-loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + +static cl::opt<bool> + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt<int> PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; - FunctionIndex = nullptr; + ModuleSummary = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -123,6 +152,10 @@ PassManagerBuilder::PassManagerBuilder() { VerifyOutput = false; MergeFunctions = false; PrepareForLTO = false; + PGOInstrGen = RunPGOInstrGen; + PGOInstrUse = RunPGOInstrUse; + PrepareForThinLTO = false; + PerformThinLTO = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -137,11 +170,11 @@ static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { - GlobalExtensions->push_back(std::make_pair(Ty, Fn)); + GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { - Extensions.push_back(std::make_pair(Ty, Fn)); + Extensions.push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, @@ -156,15 +189,34 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, void PassManagerBuilder::addInitialAliasAnalysisPasses( legacy::PassManagerBase &PM) const { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + PM.add(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + PM.add(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + PM.add(createCFLSteensAAWrapperPass()); + PM.add(createCFLAndersAAWrapperPass()); + break; + default: + break; + } + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - if (UseCFLAA) - PM.add(createCFLAAWrapperPass()); PM.add(createTypeBasedAAWrapperPass()); PM.add(createScopedNoAliasAAWrapperPass()); } +void PassManagerBuilder::addInstructionCombiningPass( + legacy::PassManagerBase &PM) const { + bool ExpensiveCombines = OptLevel > 2; + PM.add(createInstructionCombiningPass(ExpensiveCombines)); +} + void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); @@ -178,94 +230,50 @@ void PassManagerBuilder::populateFunctionPassManager( addInitialAliasAnalysisPasses(FPM); FPM.add(createCFGSimplificationPass()); - if (UseNewSROA) - FPM.add(createSROAPass()); - else - FPM.add(createScalarReplAggregatesPass()); + FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } -void PassManagerBuilder::populateModulePassManager( - legacy::PassManagerBase &MPM) { - // Allow forcing function attributes as a debugging and tuning aid. - MPM.add(createForceFunctionAttrsLegacyPass()); - - // If all optimizations are disabled, just run the always-inline pass and, - // if enabled, the function merging pass. - if (OptLevel == 0) { - if (Inliner) { - MPM.add(Inliner); - Inliner = nullptr; - } - - // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly - // creates a CGSCC pass manager, but we don't want to add extensions into - // that pass manager. To prevent this we insert a no-op module pass to reset - // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 - // builds. The function merging pass is - if (MergeFunctions) - MPM.add(createMergeFunctionsPass()); - else if (!GlobalExtensions->empty() || !Extensions.empty()) - MPM.add(createBarrierNoopPass()); - - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); +// Do PGO instrumentation generation or use pass as the option specified. +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { + if (PGOInstrGen.empty() && PGOInstrUse.empty()) return; - } - - // Add LibraryInfo if we have some. - if (LibraryInfo) - MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - - addInitialAliasAnalysisPasses(MPM); - - if (!DisableUnitAtATime) { - // Infer attributes about declarations if possible. - MPM.add(createInferFunctionAttrsLegacyPass()); - - addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); - - MPM.add(createIPSCCPPass()); // IP SCCP - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - // Promote any localized global vars - MPM.add(createPromoteMemoryToRegisterPass()); - - MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - - MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) { + // Create preinline pass. + MPM.add(createFunctionInliningPass(PreInlineThreshold)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } - - if (EnableNonLTOGlobalsModRef) - // We add a module alias analysis pass here. In part due to bugs in the - // analysis infrastructure this "works" in that the analysis stays alive - // for the entire SCC pass run below. - MPM.add(createGlobalsAAWrapperPass()); - - // Start of CallGraph SCC passes. - if (!DisableUnitAtATime) - MPM.add(createPruneEHPass()); // Remove dead EH info - if (Inliner) { - MPM.add(Inliner); - Inliner = nullptr; + if (!PGOInstrGen.empty()) { + MPM.add(createPGOInstrumentationGenLegacyPass()); + // Add the profile lowering pass. + InstrProfOptions Options; + Options.InstrProfileOutput = PGOInstrGen; + MPM.add(createInstrProfilingLegacyPass(Options)); } - if (!DisableUnitAtATime) - MPM.add(createPostOrderFunctionAttrsPass()); - if (OptLevel > 2) - MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - + if (!PGOInstrUse.empty()) + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); +} +void PassManagerBuilder::addFunctionSimplificationPasses( + legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. - if (UseNewSROA) - MPM.add(createSROAPass()); - else - MPM.add(createScalarReplAggregatesPass(-1, false)); + MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Combine silly seq's + // Combine silly seq's + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls @@ -276,7 +284,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops @@ -303,7 +311,7 @@ void PassManagerBuilder::populateModulePassManager( // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); @@ -320,7 +328,7 @@ void PassManagerBuilder::populateModulePassManager( if (BBVectorize) { MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies @@ -338,18 +346,99 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Clean up after everything. + // Clean up after everything. + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); +} + +void PassManagerBuilder::populateModulePassManager( + legacy::PassManagerBase &MPM) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + + // If all optimizations are disabled, just run the always-inline pass and, + // if enabled, the function merging pass. + if (OptLevel == 0) { + addPGOInstrPasses(MPM); + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + + // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly + // creates a CGSCC pass manager, but we don't want to add extensions into + // that pass manager. To prevent this we insert a no-op module pass to reset + // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 + // builds. The function merging pass is + if (MergeFunctions) + MPM.add(createMergeFunctionsPass()); + else if (!GlobalExtensions->empty() || !Extensions.empty()) + MPM.add(createBarrierNoopPass()); + + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + return; + } + + // Add LibraryInfo if we have some. + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + + addInitialAliasAnalysisPasses(MPM); + + if (!DisableUnitAtATime) { + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + } + + if (!PerformThinLTO) { + /// PGO instrumentation is added during the compile phase for ThinLTO, do + /// not run it a second time + addPGOInstrPasses(MPM); + } + + // Indirect call promotion that promotes intra-module targets only. + MPM.add(createPGOIndirectCallPromotionLegacyPass()); + + if (EnableNonLTOGlobalsModRef) + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + + // Start of CallGraph SCC passes. + if (!DisableUnitAtATime) + MPM.add(createPruneEHPass()); // Remove dead EH info + if (Inliner) { + MPM.add(Inliner); + Inliner = nullptr; + } + if (!DisableUnitAtATime) + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + addFunctionSimplificationPasses(MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); - if (!DisableUnitAtATime) - MPM.add(createReversePostOrderFunctionAttrsPass()); - - if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO && + !PrepareForThinLTO) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they @@ -360,6 +449,34 @@ void PassManagerBuilder::populateModulePassManager( // globals referenced by available external functions dead // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); + + if (!DisableUnitAtATime) + MPM.add(createReversePostOrderFunctionAttrsPass()); + + // If we are planning to perform ThinLTO later, let's not bloat the code with + // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes + // during ThinLTO and perform the rest of the optimizations afterward. + if (PrepareForThinLTO) { + // Reduce the size of the IR as much as possible. + MPM.add(createGlobalOptimizerPass()); + // Rename anon function to be able to export them in the summary. + MPM.add(createNameAnonFunctionPass()); + return; + } + + if (PerformThinLTO) + // Optimize globals now when performing ThinLTO, this enables more + // optimizations later. + MPM.add(createGlobalOptimizerPass()); + + // Scheduling LoopVersioningLICM when inlining is over, because after that + // we may see more accurate aliasing. Reason to run this late is that too + // early versioning may prevent further inlining due to increase of code + // size. By placing it just after inlining other optimizations which runs + // later might get benefit of no-alias assumption in clone loop. + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants } if (EnableNonLTOGlobalsModRef) @@ -391,9 +508,10 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); // Distribute loops to allow partial vectorization. I.e. isolate dependences - // into separate loop that would otherwise inhibit vectorization. - if (EnableLoopDistribute) - MPM.add(createLoopDistributePass()); + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false)); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); @@ -407,7 +525,7 @@ void PassManagerBuilder::populateModulePassManager( // on -O1 and no #pragma is found). Would be good to have these two passes // as function calls, so that we can only pass them when the vectorizer // changed the code. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); if (OptLevel > 1 && ExtraVectorizerPasses) { // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correllated @@ -417,11 +535,11 @@ void PassManagerBuilder::populateModulePassManager( // dead (or speculatable) control flows or more combining opportunities. MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); MPM.add(createLICMPass()); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); } if (RunSLPAfterLoopVectorization) { @@ -434,7 +552,7 @@ void PassManagerBuilder::populateModulePassManager( if (BBVectorize) { MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies @@ -449,19 +567,22 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops // LoopUnroll may generate some redundency to cleanup. - MPM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(MPM); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass()); + + // Get rid of LCSSA nodes. + MPM.add(createInstructionSimplifierPass()); } // After vectorization and unrolling, assume intrinsics may tell us more @@ -487,11 +608,15 @@ void PassManagerBuilder::populateModulePassManager( } void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { + // Remove unused virtual tables to improve the quality of code generated by + // whole-program devirtualization and bitset lowering. + PM.add(createGlobalDCEPass()); + // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); - if (FunctionIndex) - PM.add(createFunctionImportPass(FunctionIndex)); + if (ModuleSummary) + PM.add(createFunctionImportPass(ModuleSummary)); // Allow forcing function attributes as a debugging and tuning aid. PM.add(createForceFunctionAttrsLegacyPass()); @@ -499,14 +624,32 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Infer attributes about declarations if possible. PM.add(createInferFunctionAttrsLegacyPass()); - // Propagate constants at call sites into the functions they call. This - // opens opportunities for globalopt (and inlining) by substituting function - // pointers passed as arguments to direct uses of functions. - PM.add(createIPSCCPPass()); + if (OptLevel > 1) { + // Indirect call promotion. This should promote all the targets that are + // left by the earlier promotion pass that promotes intra-module targets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + PM.add(createPGOIndirectCallPromotionLegacyPass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + PM.add(createIPSCCPPass()); + } - // Now that we internalized some globals, see if we can hack on them! - PM.add(createPostOrderFunctionAttrsPass()); + // Infer attributes about definitions. The readnone attribute in particular is + // required for virtual constant propagation. + PM.add(createPostOrderFunctionAttrsLegacyPass()); PM.add(createReversePostOrderFunctionAttrsPass()); + + // Apply whole-program devirtualization and virtual constant propagation. + PM.add(createWholeProgramDevirtPass()); + + // That's all we need at opt level 1. + if (OptLevel == 1) + return; + + // Now that we internalized some globals, see if we can hack on them! PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. PM.add(createPromoteMemoryToRegisterPass()); @@ -522,7 +665,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); // Inline small functions @@ -544,18 +687,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createArgumentPromotionPass()); // The IPO passes may leave cruft around. Clean up after them. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); // Break up allocas - if (UseNewSROA) - PM.add(createSROAPass()); - else - PM.add(createScalarReplAggregatesPass()); + PM.add(createSROAPass()); // Run a few AA driven optimizations here and now, to cleanup the code. - PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture. + PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. @@ -573,15 +713,20 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); + if (!DisableUnrollLoops) + PM.add(createSimpleLoopUnrollPass()); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); + // The vectorizer may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + PM.add(createLoopUnrollPass()); // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - PM.add(createInstructionCombiningPass()); // Initial cleanup + addInstructionCombiningPass(PM); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert PM.add(createSCCPPass()); // Propagate exposed constants - PM.add(createInstructionCombiningPass()); // Clean up again + addInstructionCombiningPass(PM); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information @@ -597,7 +742,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createLoadCombinePass()); // Cleanup and simplify the code after the scalar optimizations. - PM.add(createInstructionCombiningPass()); + addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -620,6 +765,23 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( PM.add(createMergeFunctionsPass()); } +void PassManagerBuilder::populateThinLTOPassManager( + legacy::PassManagerBase &PM) { + PerformThinLTO = true; + + if (VerifyInput) + PM.add(createVerifierPass()); + + if (ModuleSummary) + PM.add(createFunctionImportPass(ModuleSummary)); + + populateModulePassManager(PM); + + if (VerifyOutput) + PM.add(createVerifierPass()); + PerformThinLTO = false; +} + void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (LibraryInfo) PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); @@ -627,17 +789,17 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (VerifyInput) PM.add(createVerifierPass()); - if (OptLevel > 1) + if (OptLevel != 0) addLTOOptimizationPasses(PM); // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. PM.add(createCrossDSOCFIPass()); - // Lower bit sets to globals. This pass supports Clang's control flow - // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI - // is enabled. The pass does nothing if CFI is disabled. - PM.add(createLowerBitSetsPass()); + // Lower type metadata and the type.test intrinsic. This pass supports Clang's + // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at + // link time if CFI is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerTypeTestsPass()); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 22a95fa03f7c5..2aa3fa55cefdc 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -16,7 +16,6 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" @@ -48,10 +47,10 @@ namespace { // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC) override; - bool SimplifyFunction(Function *F); - void DeleteBasicBlock(BasicBlock *BB); }; } +static bool SimplifyFunction(Function *F, CallGraph &CG); +static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG); char PruneEH::ID = 0; INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", @@ -62,22 +61,20 @@ INITIALIZE_PASS_END(PruneEH, "prune-eh", Pass *llvm::createPruneEHPass() { return new PruneEH(); } - -bool PruneEH::runOnSCC(CallGraphSCC &SCC) { +static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - SCCNodes.insert(*I); + for (CallGraphNode *I : SCC) + SCCNodes.insert(I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - if (Function *F = (*I)->getFunction()) - MadeChange |= SimplifyFunction(F); + for (CallGraphNode *I : SCC) + if (Function *F = I->getFunction()) + MadeChange |= SimplifyFunction(F, CG); // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. @@ -93,7 +90,10 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { if (!F) { SCCMightUnwind = true; SCCMightReturn = true; - } else if (F->isDeclaration() || F->mayBeOverridden()) { + } else if (F->isDeclaration() || F->isInterposable()) { + // Note: isInterposable (as opposed to hasExactDefinition) is fine above, + // since we're not inferring new attributes here, but only using existing, + // assumed to be correct, function attributes. SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { @@ -153,8 +153,8 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + for (CallGraphNode *I : SCC) { + Function *F = I->getFunction(); if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { F->addFnAttr(Attribute::NoUnwind); @@ -167,22 +167,30 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { } } - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + for (CallGraphNode *I : SCC) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. - if (Function *F = (*I)->getFunction()) - MadeChange |= SimplifyFunction(F); + if (Function *F = I->getFunction()) + MadeChange |= SimplifyFunction(F, CG); } return MadeChange; } +bool PruneEH::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + return runImpl(SCC, CG); +} + + // SimplifyFunction - Given information about callees, simplify the specified // function if we have invokes to non-unwinding functions or code after calls to // no-return functions. -bool PruneEH::SimplifyFunction(Function *F) { +static bool SimplifyFunction(Function *F, CallGraph &CG) { bool MadeChange = false; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) @@ -192,7 +200,7 @@ bool PruneEH::SimplifyFunction(Function *F) { // If the unwind block is now dead, nuke it. if (pred_empty(UnwindBlock)) - DeleteBasicBlock(UnwindBlock); // Delete the new BB. + DeleteBasicBlock(UnwindBlock, CG); // Delete the new BB. ++NumRemoved; MadeChange = true; @@ -211,7 +219,7 @@ bool PruneEH::SimplifyFunction(Function *F) { BB->getInstList().pop_back(); new UnreachableInst(BB->getContext(), &*BB); - DeleteBasicBlock(New); // Delete the new BB. + DeleteBasicBlock(New, CG); // Delete the new BB. MadeChange = true; ++NumUnreach; break; @@ -224,9 +232,8 @@ bool PruneEH::SimplifyFunction(Function *F) { /// DeleteBasicBlock - remove the specified basic block from the program, /// updating the callgraph to reflect any now-obsolete edges due to calls that /// exist in the BB. -void PruneEH::DeleteBasicBlock(BasicBlock *BB) { +static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) { assert(pred_empty(BB) && "BB is not dead!"); - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); Instruction *TokenInst = nullptr; diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 928d92ef9d121..39de108edc067 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -22,10 +22,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/SampleProfile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Constants.h" @@ -35,6 +37,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" @@ -76,16 +79,6 @@ static cl::opt<double> SampleProfileHotThreshold( "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), cl::desc("Inlined functions that account for more than N% of all samples " "collected in the parent function, will be inlined again.")); -static cl::opt<double> SampleProfileGlobalHotThreshold( - "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"), - cl::desc("Top-level functions that account for more than N% of all samples " - "collected in the profile, will be marked as hot for the inliner " - "to consider.")); -static cl::opt<double> SampleProfileGlobalColdThreshold( - "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"), - cl::desc("Top-level functions that account for less than N% of all samples " - "collected in the profile, will be marked as cold for the inliner " - "to consider.")); namespace { typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap; @@ -100,30 +93,19 @@ typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>> /// This pass reads profile data from the file specified by /// -sample-profile-file and annotates every affected function with the /// profile information found in that file. -class SampleProfileLoader : public ModulePass { +class SampleProfileLoader { public: - // Class identification, replacement for typeinfo - static char ID; - SampleProfileLoader(StringRef Name = SampleProfileFile) - : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), + : DT(nullptr), PDT(nullptr), LI(nullptr), ACT(nullptr), Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false), - TotalCollectedSamples(0) { - initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); - } + TotalCollectedSamples(0) {} - bool doInitialization(Module &M) override; + bool doInitialization(Module &M); + bool runOnModule(Module &M); + void setACT(AssumptionCacheTracker *A) { ACT = A; } void dump() { Reader->dump(); } - const char *getPassName() const override { return "Sample profile pass"; } - - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - } - protected: bool runOnFunction(Function &F); unsigned getFunctionLoc(Function &F); @@ -133,14 +115,12 @@ protected: const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineHotFunctions(Function &F); - bool emitInlineHints(Function &F); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); bool computeBlockWeights(Function &F); void findEquivalenceClasses(Function &F); - void findEquivalencesFor(BasicBlock *BB1, - SmallVector<BasicBlock *, 8> Descendants, + void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants, DominatorTreeBase<BasicBlock> *DomTree); void propagateWeights(Function &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); @@ -163,10 +143,10 @@ protected: EdgeWeightMap EdgeWeights; /// \brief Set of visited blocks during propagation. - SmallPtrSet<const BasicBlock *, 128> VisitedBlocks; + SmallPtrSet<const BasicBlock *, 32> VisitedBlocks; /// \brief Set of visited edges during propagation. - SmallSet<Edge, 128> VisitedEdges; + SmallSet<Edge, 32> VisitedEdges; /// \brief Equivalence classes for block weights. /// @@ -181,6 +161,8 @@ protected: std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT; std::unique_ptr<LoopInfo> LI; + AssumptionCacheTracker *ACT; + /// \brief Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -206,6 +188,32 @@ protected: uint64_t TotalCollectedSamples; }; +class SampleProfileLoaderLegacyPass : public ModulePass { +public: + // Class identification, replacement for typeinfo + static char ID; + + SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile) + : ModulePass(ID), SampleLoader(Name) { + initializeSampleProfileLoaderLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void dump() { SampleLoader.dump(); } + + bool doInitialization(Module &M) override { + return SampleLoader.doInitialization(M); + } + const char *getPassName() const override { return "Sample profile pass"; } + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + } +private: + SampleProfileLoader SampleLoader; +}; + class SampleCoverageTracker { public: SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {} @@ -285,7 +293,6 @@ bool callsiteIsHot(const FunctionSamples *CallerFS, (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0; return PercentSamples >= SampleProfileHotThreshold; } - } /// Mark as used the sample record for the given function samples at @@ -445,7 +452,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, /// \returns the weight of \p Inst. ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) const { - DebugLoc DLoc = Inst.getDebugLoc(); + const DebugLoc &DLoc = Inst.getDebugLoc(); if (!DLoc) return std::error_code(); @@ -453,6 +460,11 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const { if (!FS) return std::error_code(); + // Ignore all dbg_value intrinsics. + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst); + if (II && II->getIntrinsicID() == Intrinsic::dbg_value) + return std::error_code(); + const DILocation *DIL = DLoc; unsigned Lineno = DLoc.getLine(); unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine(); @@ -476,6 +488,13 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const { << Inst << " (line offset: " << Lineno - HeaderLineno << "." << DIL->getDiscriminator() << " - weight: " << R.get() << ")\n"); + } else { + // If a call instruction is inlined in profile, but not inlined here, + // it means that the inlined callsite has no sample, thus the call + // instruction should have 0 count. + const CallInst *CI = dyn_cast<CallInst>(&Inst); + if (CI && findCalleeFunctionSamples(*CI)) + R = 0; } return R; } @@ -490,19 +509,22 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const { /// \returns the weight for \p BB. ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const { - bool Found = false; - uint64_t Weight = 0; + DenseMap<uint64_t, uint64_t> CM; for (auto &I : BB->getInstList()) { const ErrorOr<uint64_t> &R = getInstWeight(I); - if (R && R.get() >= Weight) { - Weight = R.get(); - Found = true; + if (R) CM[R.get()]++; + } + if (CM.size() == 0) return std::error_code(); + uint64_t W = 0, C = 0; + for (const auto &C_W : CM) { + if (C_W.second == W) { + C = std::max(C, C_W.first); + } else if (C_W.second > W) { + C = C_W.first; + W = C_W.second; } } - if (Found) - return Weight; - else - return std::error_code(); + return C; } /// \brief Compute and store the weights of every basic block. @@ -549,19 +571,12 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const { if (!SP) return nullptr; - Function *CalleeFunc = Inst.getCalledFunction(); - if (!CalleeFunc) { - return nullptr; - } - - StringRef CalleeName = CalleeFunc->getName(); const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return nullptr; - return FS->findFunctionSamplesAt( - CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), - DIL->getDiscriminator(), CalleeName)); + return FS->findFunctionSamplesAt(LineLocation( + getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator())); } /// \brief Get the FunctionSamples for an instruction. @@ -575,22 +590,17 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const { /// \returns the FunctionSamples pointer to the inlined instance. const FunctionSamples * SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { - SmallVector<CallsiteLocation, 10> S; + SmallVector<LineLocation, 10> S; const DILocation *DIL = Inst.getDebugLoc(); if (!DIL) { return Samples; } - StringRef CalleeName; - for (const DILocation *DIL = Inst.getDebugLoc(); DIL; - DIL = DIL->getInlinedAt()) { + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { DISubprogram *SP = DIL->getScope()->getSubprogram(); if (!SP) return nullptr; - if (!CalleeName.empty()) { - S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), - DIL->getDiscriminator(), CalleeName)); - } - CalleeName = SP->getLinkageName(); + S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator())); } if (S.size() == 0) return Samples; @@ -601,63 +611,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return FS; } -/// \brief Emit an inline hint if \p F is globally hot or cold. -/// -/// If \p F consumes a significant fraction of samples (indicated by -/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the -/// inliner to consider the function hot. -/// -/// If \p F consumes a small fraction of samples (indicated by -/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner -/// to consider the function cold. -/// -/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a -/// function globally hot or cold, we should be annotating individual callsites. -/// This is not currently possible, but work on the inliner will eventually -/// provide this ability. See http://reviews.llvm.org/D15003 for details and -/// discussion. -/// -/// \returns True if either attribute was applied to \p F. -bool SampleProfileLoader::emitInlineHints(Function &F) { - if (TotalCollectedSamples == 0) - return false; - - uint64_t FunctionSamples = Samples->getTotalSamples(); - double SamplesPercent = - (double)FunctionSamples / (double)TotalCollectedSamples * 100.0; - - // If the function collected more samples than the hot threshold, mark - // it globally hot. - if (SamplesPercent >= SampleProfileGlobalHotThreshold) { - F.addFnAttr(llvm::Attribute::InlineHint); - std::string Msg; - raw_string_ostream S(Msg); - S << "Applied inline hint to globally hot function '" << F.getName() - << "' with " << format("%.2f", SamplesPercent) - << "% of samples (threshold: " - << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)"; - S.flush(); - emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); - return true; - } - - // If the function collected fewer samples than the cold threshold, mark - // it globally cold. - if (SamplesPercent <= SampleProfileGlobalColdThreshold) { - F.addFnAttr(llvm::Attribute::Cold); - std::string Msg; - raw_string_ostream S(Msg); - S << "Applied cold hint to globally cold function '" << F.getName() - << "' with " << format("%.2f", SamplesPercent) - << "% of samples (threshold: " - << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)"; - S.flush(); - emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); - return true; - } - - return false; -} /// \brief Iteratively inline hot callsites of a function. /// @@ -685,7 +638,7 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) { } } for (auto CI : CIS) { - InlineFunctionInfo IFI; + InlineFunctionInfo IFI(nullptr, ACT); Function *CalledFunction = CI->getCalledFunction(); DebugLoc DLoc = CI->getDebugLoc(); uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples(); @@ -731,7 +684,7 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) { /// with blocks from \p BB1's dominator tree, then /// this is the post-dominator tree, and vice versa. void SampleProfileLoader::findEquivalencesFor( - BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants, + BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants, DominatorTreeBase<BasicBlock> *DomTree) { const BasicBlock *EC = EquivalenceClass[BB1]; uint64_t Weight = BlockWeights[EC]; @@ -859,23 +812,31 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) { // edge is unknown (see setEdgeOrBlockWeight). for (unsigned i = 0; i < 2; i++) { uint64_t TotalWeight = 0; - unsigned NumUnknownEdges = 0; - Edge UnknownEdge, SelfReferentialEdge; + unsigned NumUnknownEdges = 0, NumTotalEdges = 0; + Edge UnknownEdge, SelfReferentialEdge, SingleEdge; if (i == 0) { // First, visit all predecessor edges. + NumTotalEdges = Predecessors[BB].size(); for (auto *Pred : Predecessors[BB]) { Edge E = std::make_pair(Pred, BB); TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); if (E.first == E.second) SelfReferentialEdge = E; } + if (NumTotalEdges == 1) { + SingleEdge = std::make_pair(Predecessors[BB][0], BB); + } } else { // On the second round, visit all successor edges. + NumTotalEdges = Successors[BB].size(); for (auto *Succ : Successors[BB]) { Edge E = std::make_pair(BB, Succ); TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); } + if (NumTotalEdges == 1) { + SingleEdge = std::make_pair(BB, Successors[BB][0]); + } } // After visiting all the edges, there are three cases that we @@ -904,18 +865,24 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) { if (NumUnknownEdges <= 1) { uint64_t &BBWeight = BlockWeights[EC]; if (NumUnknownEdges == 0) { - // If we already know the weight of all edges, the weight of the - // basic block can be computed. It should be no larger than the sum - // of all edge weights. - if (TotalWeight > BBWeight) { - BBWeight = TotalWeight; + if (!VisitedBlocks.count(EC)) { + // If we already know the weight of all edges, the weight of the + // basic block can be computed. It should be no larger than the sum + // of all edge weights. + if (TotalWeight > BBWeight) { + BBWeight = TotalWeight; + Changed = true; + DEBUG(dbgs() << "All edge weights for " << BB->getName() + << " known. Set weight for block: "; + printBlockWeight(dbgs(), BB);); + } + } else if (NumTotalEdges == 1 && + EdgeWeights[SingleEdge] < BlockWeights[EC]) { + // If there is only one edge for the visited basic block, use the + // block weight to adjust edge weight if edge weight is smaller. + EdgeWeights[SingleEdge] = BlockWeights[EC]; Changed = true; - DEBUG(dbgs() << "All edge weights for " << BB->getName() - << " known. Set weight for block: "; - printBlockWeight(dbgs(), BB);); } - if (VisitedBlocks.insert(EC).second) - Changed = true; } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) { // If there is a single unknown edge and the block has been // visited, then we can compute E's weight. @@ -1020,6 +987,19 @@ void SampleProfileLoader::propagateWeights(Function &F) { MDBuilder MDB(Ctx); for (auto &BI : F) { BasicBlock *BB = &BI; + + if (BlockWeights[BB]) { + for (auto &I : BB->getInstList()) { + if (CallInst *CI = dyn_cast<CallInst>(&I)) { + if (!dyn_cast<IntrinsicInst>(&I)) { + SmallVector<uint32_t, 1> Weights; + Weights.push_back(BlockWeights[BB]); + CI->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + } + } + } + } TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) continue; @@ -1084,7 +1064,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { /// \returns the line number where \p F is defined. If it returns 0, /// it means that there is no debug information available for \p F. unsigned SampleProfileLoader::getFunctionLoc(Function &F) { - if (DISubprogram *S = getDISubprogram(&F)) + if (DISubprogram *S = F.getSubprogram()) return S->getLine(); // If the start of \p F is missing, emit a diagnostic to inform the user @@ -1165,8 +1145,6 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - Changed |= emitInlineHints(F); - Changed |= inlineHotFunctions(F); // Compute basic block weights. @@ -1190,7 +1168,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); if (Coverage < SampleProfileRecordCoverage) { F.getContext().diagnose(DiagnosticInfoSampleProfile( - getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + F.getSubprogram()->getFilename(), getFunctionLoc(F), Twine(Used) + " of " + Twine(Total) + " available profile records (" + Twine(Coverage) + "%) were applied", DS_Warning)); @@ -1203,7 +1181,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); if (Coverage < SampleProfileSampleCoverage) { F.getContext().diagnose(DiagnosticInfoSampleProfile( - getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + F.getSubprogram()->getFilename(), getFunctionLoc(F), Twine(Used) + " of " + Twine(Total) + " available profile samples (" + Twine(Coverage) + "%) were applied", DS_Warning)); @@ -1212,12 +1190,12 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { return Changed; } -char SampleProfileLoader::ID = 0; -INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile", - "Sample Profile loader", false, false) -INITIALIZE_PASS_DEPENDENCY(AddDiscriminators) -INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile", - "Sample Profile loader", false, false) +char SampleProfileLoaderLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", + "Sample Profile loader", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", + "Sample Profile loader", false, false) bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); @@ -1233,11 +1211,11 @@ bool SampleProfileLoader::doInitialization(Module &M) { } ModulePass *llvm::createSampleProfileLoaderPass() { - return new SampleProfileLoader(SampleProfileFile); + return new SampleProfileLoaderLegacyPass(SampleProfileFile); } ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { - return new SampleProfileLoader(Name); + return new SampleProfileLoaderLegacyPass(Name); } bool SampleProfileLoader::runOnModule(Module &M) { @@ -1254,12 +1232,33 @@ bool SampleProfileLoader::runOnModule(Module &M) { clearFunctionData(); retval |= runOnFunction(F); } + M.setProfileSummary(Reader->getSummary().getMD(M.getContext())); return retval; } +bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { + // FIXME: pass in AssumptionCache correctly for the new pass manager. + SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>()); + return SampleLoader.runOnModule(M); +} + bool SampleProfileLoader::runOnFunction(Function &F) { + F.setEntryCount(0); Samples = Reader->getSamplesFor(F); if (!Samples->empty()) return emitAnnotations(F); return false; } + +PreservedAnalyses SampleProfileLoaderPass::run(Module &M, + AnalysisManager<Module> &AM) { + + SampleProfileLoader SampleLoader(SampleProfileFile); + + SampleLoader.doInitialization(M); + + if (!SampleLoader.runOnModule(M)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index c94cc7c74a894..3c3c5dd19d1f0 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -53,7 +53,8 @@ static bool stripDeadPrototypes(Module &M) { return MadeChange; } -PreservedAnalyses StripDeadPrototypesPass::run(Module &M) { +PreservedAnalyses StripDeadPrototypesPass::run(Module &M, + ModuleAnalysisManager &) { if (stripDeadPrototypes(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -69,6 +70,9 @@ public: *PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + return stripDeadPrototypes(M); } }; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 46f352f7f9f13..fd250366cef24 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -216,11 +215,11 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { I->setName(""); // Internal symbols can't participate in linkage } - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) - if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) - I->setName(""); // Internal symbols can't participate in linkage - StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); + for (Function &I : M) { + if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0) + if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg")) + I.setName(""); // Internal symbols can't participate in linkage + StripSymtab(I.getValueSymbolTable(), PreserveDbgInfo); } // Remove all names from types. @@ -230,6 +229,9 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { } bool StripSymbols::runOnModule(Module &M) { + if (skipModule(M)) + return false; + bool Changed = false; Changed |= StripDebugInfo(M); if (!OnlyDebugInfo) @@ -238,10 +240,15 @@ bool StripSymbols::runOnModule(Module &M) { } bool StripNonDebugSymbols::runOnModule(Module &M) { + if (skipModule(M)) + return false; + return StripSymbolNames(M, true); } bool StripDebugDeclare::runOnModule(Module &M) { + if (skipModule(M)) + return false; Function *Declare = M.getFunction("llvm.dbg.declare"); std::vector<Constant*> DeadConstants; @@ -287,6 +294,9 @@ bool StripDebugDeclare::runOnModule(Module &M) { /// optimized away by the optimizer. This special pass removes debug info for /// such symbols. bool StripDeadDebugInfo::runOnModule(Module &M) { + if (skipModule(M)) + return false; + bool Changed = false; LLVMContext &C = M.getContext(); @@ -312,20 +322,6 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { } for (DICompileUnit *DIC : F.compile_units()) { - // Create our live subprogram list. - bool SubprogramChange = false; - for (DISubprogram *DISP : DIC->getSubprograms()) { - // Make sure we visit each subprogram only once. - if (!VisitedSet.insert(DISP).second) - continue; - - // If the function referenced by DISP is not null, the function is live. - if (LiveSPs.count(DISP)) - LiveSubprograms.push_back(DISP); - else - SubprogramChange = true; - } - // Create our live global variable list. bool GlobalVariableChange = false; for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) { @@ -341,14 +337,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { GlobalVariableChange = true; } - // If we found dead subprograms or global variables, replace the current - // subprogram list/global variable list with our new live subprogram/global - // variable list. - if (SubprogramChange) { - DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms)); - Changed = true; - } - + // If we found dead global variables, replace the current global + // variable list with our new live global variable list. if (GlobalVariableChange) { DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables)); Changed = true; diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp new file mode 100644 index 0000000000000..53eb4e2c90761 --- /dev/null +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -0,0 +1,843 @@ +//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements whole program optimization of virtual calls in cases +// where we know (via !type metadata) that the list of callees is fixed. This +// includes the following: +// - Single implementation devirtualization: if a virtual call has a single +// possible callee, replace all calls with a direct call to that callee. +// - Virtual constant propagation: if the virtual function's return type is an +// integer <=64 bits and all possible callees are readnone, for each class and +// each list of constant arguments: evaluate the function, store the return +// value alongside the virtual table, and rewrite each virtual call as a load +// from the virtual table. +// - Uniform return value optimization: if the conditions for virtual constant +// propagation hold and each function returns the same constant value, replace +// each virtual call with that constant. +// - Unique return value optimization for i1 return values: if the conditions +// for virtual constant propagation hold and a single vtable's function +// returns 0, or a single vtable's function returns 1, replace each virtual +// call with a comparison of the vptr against that vtable's address. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Evaluator.h" +#include "llvm/Transforms/Utils/Local.h" + +#include <set> + +using namespace llvm; +using namespace wholeprogramdevirt; + +#define DEBUG_TYPE "wholeprogramdevirt" + +// Find the minimum offset that we may store a value of size Size bits at. If +// IsAfter is set, look for an offset before the object, otherwise look for an +// offset after the object. +uint64_t +wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets, + bool IsAfter, uint64_t Size) { + // Find a minimum offset taking into account only vtable sizes. + uint64_t MinByte = 0; + for (const VirtualCallTarget &Target : Targets) { + if (IsAfter) + MinByte = std::max(MinByte, Target.minAfterBytes()); + else + MinByte = std::max(MinByte, Target.minBeforeBytes()); + } + + // Build a vector of arrays of bytes covering, for each target, a slice of the + // used region (see AccumBitVector::BytesUsed in + // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively, + // this aligns the used regions to start at MinByte. + // + // In this example, A, B and C are vtables, # is a byte already allocated for + // a virtual function pointer, AAAA... (etc.) are the used regions for the + // vtables and Offset(X) is the value computed for the Offset variable below + // for X. + // + // Offset(A) + // | | + // |MinByte + // A: ################AAAAAAAA|AAAAAAAA + // B: ########BBBBBBBBBBBBBBBB|BBBB + // C: ########################|CCCCCCCCCCCCCCCC + // | Offset(B) | + // + // This code produces the slices of A, B and C that appear after the divider + // at MinByte. + std::vector<ArrayRef<uint8_t>> Used; + for (const VirtualCallTarget &Target : Targets) { + ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed + : Target.TM->Bits->Before.BytesUsed; + uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes() + : MinByte - Target.minBeforeBytes(); + + // Disregard used regions that are smaller than Offset. These are + // effectively all-free regions that do not need to be checked. + if (VTUsed.size() > Offset) + Used.push_back(VTUsed.slice(Offset)); + } + + if (Size == 1) { + // Find a free bit in each member of Used. + for (unsigned I = 0;; ++I) { + uint8_t BitsUsed = 0; + for (auto &&B : Used) + if (I < B.size()) + BitsUsed |= B[I]; + if (BitsUsed != 0xff) + return (MinByte + I) * 8 + + countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined); + } + } else { + // Find a free (Size/8) byte region in each member of Used. + // FIXME: see if alignment helps. + for (unsigned I = 0;; ++I) { + for (auto &&B : Used) { + unsigned Byte = 0; + while ((I + Byte) < B.size() && Byte < (Size / 8)) { + if (B[I + Byte]) + goto NextI; + ++Byte; + } + } + return (MinByte + I) * 8; + NextI:; + } + } +} + +void wholeprogramdevirt::setBeforeReturnValues( + MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore, + unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) { + if (BitWidth == 1) + OffsetByte = -(AllocBefore / 8 + 1); + else + OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8); + OffsetBit = AllocBefore % 8; + + for (VirtualCallTarget &Target : Targets) { + if (BitWidth == 1) + Target.setBeforeBit(AllocBefore); + else + Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8); + } +} + +void wholeprogramdevirt::setAfterReturnValues( + MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter, + unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) { + if (BitWidth == 1) + OffsetByte = AllocAfter / 8; + else + OffsetByte = (AllocAfter + 7) / 8; + OffsetBit = AllocAfter % 8; + + for (VirtualCallTarget &Target : Targets) { + if (BitWidth == 1) + Target.setAfterBit(AllocAfter); + else + Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8); + } +} + +VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM) + : Fn(Fn), TM(TM), + IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()) {} + +namespace { + +// A slot in a set of virtual tables. The TypeID identifies the set of virtual +// tables, and the ByteOffset is the offset in bytes from the address point to +// the virtual function pointer. +struct VTableSlot { + Metadata *TypeID; + uint64_t ByteOffset; +}; + +} + +namespace llvm { + +template <> struct DenseMapInfo<VTableSlot> { + static VTableSlot getEmptyKey() { + return {DenseMapInfo<Metadata *>::getEmptyKey(), + DenseMapInfo<uint64_t>::getEmptyKey()}; + } + static VTableSlot getTombstoneKey() { + return {DenseMapInfo<Metadata *>::getTombstoneKey(), + DenseMapInfo<uint64_t>::getTombstoneKey()}; + } + static unsigned getHashValue(const VTableSlot &I) { + return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^ + DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset); + } + static bool isEqual(const VTableSlot &LHS, + const VTableSlot &RHS) { + return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; + } +}; + +} + +namespace { + +// A virtual call site. VTable is the loaded virtual table pointer, and CS is +// the indirect virtual call. +struct VirtualCallSite { + Value *VTable; + CallSite CS; + + // If non-null, this field points to the associated unsafe use count stored in + // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description + // of that field for details. + unsigned *NumUnsafeUses; + + void emitRemark() { + Function *F = CS.getCaller(); + emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, + CS.getInstruction()->getDebugLoc(), + "devirtualized call"); + } + + void replaceAndErase(Value *New) { + emitRemark(); + CS->replaceAllUsesWith(New); + if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) { + BranchInst::Create(II->getNormalDest(), CS.getInstruction()); + II->getUnwindDest()->removePredecessor(II->getParent()); + } + CS->eraseFromParent(); + // This use is no longer unsafe. + if (NumUnsafeUses) + --*NumUnsafeUses; + } +}; + +struct DevirtModule { + Module &M; + IntegerType *Int8Ty; + PointerType *Int8PtrTy; + IntegerType *Int32Ty; + + MapVector<VTableSlot, std::vector<VirtualCallSite>> CallSlots; + + // This map keeps track of the number of "unsafe" uses of a loaded function + // pointer. The key is the associated llvm.type.test intrinsic call generated + // by this pass. An unsafe use is one that calls the loaded function pointer + // directly. Every time we eliminate an unsafe use (for example, by + // devirtualizing it or by applying virtual constant propagation), we + // decrement the value stored in this map. If a value reaches zero, we can + // eliminate the type check by RAUWing the associated llvm.type.test call with + // true. + std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest; + + DevirtModule(Module &M) + : M(M), Int8Ty(Type::getInt8Ty(M.getContext())), + Int8PtrTy(Type::getInt8PtrTy(M.getContext())), + Int32Ty(Type::getInt32Ty(M.getContext())) {} + + void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc); + void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); + + void buildTypeIdentifierMap( + std::vector<VTableBits> &Bits, + DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap); + bool + tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot, + const std::set<TypeMemberInfo> &TypeMemberInfos, + uint64_t ByteOffset); + bool trySingleImplDevirt(ArrayRef<VirtualCallTarget> TargetsForSlot, + MutableArrayRef<VirtualCallSite> CallSites); + bool tryEvaluateFunctionsWithArgs( + MutableArrayRef<VirtualCallTarget> TargetsForSlot, + ArrayRef<ConstantInt *> Args); + bool tryUniformRetValOpt(IntegerType *RetType, + ArrayRef<VirtualCallTarget> TargetsForSlot, + MutableArrayRef<VirtualCallSite> CallSites); + bool tryUniqueRetValOpt(unsigned BitWidth, + ArrayRef<VirtualCallTarget> TargetsForSlot, + MutableArrayRef<VirtualCallSite> CallSites); + bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot, + ArrayRef<VirtualCallSite> CallSites); + + void rebuildGlobal(VTableBits &B); + + bool run(); +}; + +struct WholeProgramDevirt : public ModulePass { + static char ID; + WholeProgramDevirt() : ModulePass(ID) { + initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) { + if (skipModule(M)) + return false; + + return DevirtModule(M).run(); + } +}; + +} // anonymous namespace + +INITIALIZE_PASS(WholeProgramDevirt, "wholeprogramdevirt", + "Whole program devirtualization", false, false) +char WholeProgramDevirt::ID = 0; + +ModulePass *llvm::createWholeProgramDevirtPass() { + return new WholeProgramDevirt; +} + +PreservedAnalyses WholeProgramDevirtPass::run(Module &M, + ModuleAnalysisManager &) { + if (!DevirtModule(M).run()) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} + +void DevirtModule::buildTypeIdentifierMap( + std::vector<VTableBits> &Bits, + DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) { + DenseMap<GlobalVariable *, VTableBits *> GVToBits; + Bits.reserve(M.getGlobalList().size()); + SmallVector<MDNode *, 2> Types; + for (GlobalVariable &GV : M.globals()) { + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (Types.empty()) + continue; + + VTableBits *&BitsPtr = GVToBits[&GV]; + if (!BitsPtr) { + Bits.emplace_back(); + Bits.back().GV = &GV; + Bits.back().ObjectSize = + M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType()); + BitsPtr = &Bits.back(); + } + + for (MDNode *Type : Types) { + auto TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast<ConstantInt>( + cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + TypeIdMap[TypeID].insert({BitsPtr, Offset}); + } + } +} + +bool DevirtModule::tryFindVirtualCallTargets( + std::vector<VirtualCallTarget> &TargetsForSlot, + const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) { + for (const TypeMemberInfo &TM : TypeMemberInfos) { + if (!TM.Bits->GV->isConstant()) + return false; + + auto Init = dyn_cast<ConstantArray>(TM.Bits->GV->getInitializer()); + if (!Init) + return false; + ArrayType *VTableTy = Init->getType(); + + uint64_t ElemSize = + M.getDataLayout().getTypeAllocSize(VTableTy->getElementType()); + uint64_t GlobalSlotOffset = TM.Offset + ByteOffset; + if (GlobalSlotOffset % ElemSize != 0) + return false; + + unsigned Op = GlobalSlotOffset / ElemSize; + if (Op >= Init->getNumOperands()) + return false; + + auto Fn = dyn_cast<Function>(Init->getOperand(Op)->stripPointerCasts()); + if (!Fn) + return false; + + // We can disregard __cxa_pure_virtual as a possible call target, as + // calls to pure virtuals are UB. + if (Fn->getName() == "__cxa_pure_virtual") + continue; + + TargetsForSlot.push_back({Fn, &TM}); + } + + // Give up if we couldn't find any targets. + return !TargetsForSlot.empty(); +} + +bool DevirtModule::trySingleImplDevirt( + ArrayRef<VirtualCallTarget> TargetsForSlot, + MutableArrayRef<VirtualCallSite> CallSites) { + // See if the program contains a single implementation of this virtual + // function. + Function *TheFn = TargetsForSlot[0].Fn; + for (auto &&Target : TargetsForSlot) + if (TheFn != Target.Fn) + return false; + + // If so, update each call site to call that implementation directly. + for (auto &&VCallSite : CallSites) { + VCallSite.emitRemark(); + VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast( + TheFn, VCallSite.CS.getCalledValue()->getType())); + // This use is no longer unsafe. + if (VCallSite.NumUnsafeUses) + --*VCallSite.NumUnsafeUses; + } + return true; +} + +bool DevirtModule::tryEvaluateFunctionsWithArgs( + MutableArrayRef<VirtualCallTarget> TargetsForSlot, + ArrayRef<ConstantInt *> Args) { + // Evaluate each function and store the result in each target's RetVal + // field. + for (VirtualCallTarget &Target : TargetsForSlot) { + if (Target.Fn->arg_size() != Args.size() + 1) + return false; + for (unsigned I = 0; I != Args.size(); ++I) + if (Target.Fn->getFunctionType()->getParamType(I + 1) != + Args[I]->getType()) + return false; + + Evaluator Eval(M.getDataLayout(), nullptr); + SmallVector<Constant *, 2> EvalArgs; + EvalArgs.push_back( + Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0))); + EvalArgs.insert(EvalArgs.end(), Args.begin(), Args.end()); + Constant *RetVal; + if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) || + !isa<ConstantInt>(RetVal)) + return false; + Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue(); + } + return true; +} + +bool DevirtModule::tryUniformRetValOpt( + IntegerType *RetType, ArrayRef<VirtualCallTarget> TargetsForSlot, + MutableArrayRef<VirtualCallSite> CallSites) { + // Uniform return value optimization. If all functions return the same + // constant, replace all calls with that constant. + uint64_t TheRetVal = TargetsForSlot[0].RetVal; + for (const VirtualCallTarget &Target : TargetsForSlot) + if (Target.RetVal != TheRetVal) + return false; + + auto TheRetValConst = ConstantInt::get(RetType, TheRetVal); + for (auto Call : CallSites) + Call.replaceAndErase(TheRetValConst); + return true; +} + +bool DevirtModule::tryUniqueRetValOpt( + unsigned BitWidth, ArrayRef<VirtualCallTarget> TargetsForSlot, + MutableArrayRef<VirtualCallSite> CallSites) { + // IsOne controls whether we look for a 0 or a 1. + auto tryUniqueRetValOptFor = [&](bool IsOne) { + const TypeMemberInfo *UniqueMember = 0; + for (const VirtualCallTarget &Target : TargetsForSlot) { + if (Target.RetVal == (IsOne ? 1 : 0)) { + if (UniqueMember) + return false; + UniqueMember = Target.TM; + } + } + + // We should have found a unique member or bailed out by now. We already + // checked for a uniform return value in tryUniformRetValOpt. + assert(UniqueMember); + + // Replace each call with the comparison. + for (auto &&Call : CallSites) { + IRBuilder<> B(Call.CS.getInstruction()); + Value *OneAddr = B.CreateBitCast(UniqueMember->Bits->GV, Int8PtrTy); + OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset); + Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, + Call.VTable, OneAddr); + Call.replaceAndErase(Cmp); + } + return true; + }; + + if (BitWidth == 1) { + if (tryUniqueRetValOptFor(true)) + return true; + if (tryUniqueRetValOptFor(false)) + return true; + } + return false; +} + +bool DevirtModule::tryVirtualConstProp( + MutableArrayRef<VirtualCallTarget> TargetsForSlot, + ArrayRef<VirtualCallSite> CallSites) { + // This only works if the function returns an integer. + auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType()); + if (!RetType) + return false; + unsigned BitWidth = RetType->getBitWidth(); + if (BitWidth > 64) + return false; + + // Make sure that each function does not access memory, takes at least one + // argument, does not use its first argument (which we assume is 'this'), + // and has the same return type. + for (VirtualCallTarget &Target : TargetsForSlot) { + if (!Target.Fn->doesNotAccessMemory() || Target.Fn->arg_empty() || + !Target.Fn->arg_begin()->use_empty() || + Target.Fn->getReturnType() != RetType) + return false; + } + + // Group call sites by the list of constant arguments they pass. + // The comparator ensures deterministic ordering. + struct ByAPIntValue { + bool operator()(const std::vector<ConstantInt *> &A, + const std::vector<ConstantInt *> &B) const { + return std::lexicographical_compare( + A.begin(), A.end(), B.begin(), B.end(), + [](ConstantInt *AI, ConstantInt *BI) { + return AI->getValue().ult(BI->getValue()); + }); + } + }; + std::map<std::vector<ConstantInt *>, std::vector<VirtualCallSite>, + ByAPIntValue> + VCallSitesByConstantArg; + for (auto &&VCallSite : CallSites) { + std::vector<ConstantInt *> Args; + if (VCallSite.CS.getType() != RetType) + continue; + for (auto &&Arg : + make_range(VCallSite.CS.arg_begin() + 1, VCallSite.CS.arg_end())) { + if (!isa<ConstantInt>(Arg)) + break; + Args.push_back(cast<ConstantInt>(&Arg)); + } + if (Args.size() + 1 != VCallSite.CS.arg_size()) + continue; + + VCallSitesByConstantArg[Args].push_back(VCallSite); + } + + for (auto &&CSByConstantArg : VCallSitesByConstantArg) { + if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first)) + continue; + + if (tryUniformRetValOpt(RetType, TargetsForSlot, CSByConstantArg.second)) + continue; + + if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second)) + continue; + + // Find an allocation offset in bits in all vtables associated with the + // type. + uint64_t AllocBefore = + findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth); + uint64_t AllocAfter = + findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth); + + // Calculate the total amount of padding needed to store a value at both + // ends of the object. + uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0; + for (auto &&Target : TargetsForSlot) { + TotalPaddingBefore += std::max<int64_t>( + (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0); + TotalPaddingAfter += std::max<int64_t>( + (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0); + } + + // If the amount of padding is too large, give up. + // FIXME: do something smarter here. + if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128) + continue; + + // Calculate the offset to the value as a (possibly negative) byte offset + // and (if applicable) a bit offset, and store the values in the targets. + int64_t OffsetByte; + uint64_t OffsetBit; + if (TotalPaddingBefore <= TotalPaddingAfter) + setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte, + OffsetBit); + else + setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte, + OffsetBit); + + // Rewrite each call to a load from OffsetByte/OffsetBit. + for (auto Call : CSByConstantArg.second) { + IRBuilder<> B(Call.CS.getInstruction()); + Value *Addr = B.CreateConstGEP1_64(Call.VTable, OffsetByte); + if (BitWidth == 1) { + Value *Bits = B.CreateLoad(Addr); + Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit); + Value *BitsAndBit = B.CreateAnd(Bits, Bit); + auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0)); + Call.replaceAndErase(IsBitSet); + } else { + Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo()); + Value *Val = B.CreateLoad(RetType, ValAddr); + Call.replaceAndErase(Val); + } + } + } + return true; +} + +void DevirtModule::rebuildGlobal(VTableBits &B) { + if (B.Before.Bytes.empty() && B.After.Bytes.empty()) + return; + + // Align each byte array to pointer width. + unsigned PointerSize = M.getDataLayout().getPointerSize(); + B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize)); + B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize)); + + // Before was stored in reverse order; flip it now. + for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I) + std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]); + + // Build an anonymous global containing the before bytes, followed by the + // original initializer, followed by the after bytes. + auto NewInit = ConstantStruct::getAnon( + {ConstantDataArray::get(M.getContext(), B.Before.Bytes), + B.GV->getInitializer(), + ConstantDataArray::get(M.getContext(), B.After.Bytes)}); + auto NewGV = + new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(), + GlobalVariable::PrivateLinkage, NewInit, "", B.GV); + NewGV->setSection(B.GV->getSection()); + NewGV->setComdat(B.GV->getComdat()); + + // Copy the original vtable's metadata to the anonymous global, adjusting + // offsets as required. + NewGV->copyMetadata(B.GV, B.Before.Bytes.size()); + + // Build an alias named after the original global, pointing at the second + // element (the original initializer). + auto Alias = GlobalAlias::create( + B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "", + ConstantExpr::getGetElementPtr( + NewInit->getType(), NewGV, + ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, 1)}), + &M); + Alias->setVisibility(B.GV->getVisibility()); + Alias->takeName(B.GV); + + B.GV->replaceAllUsesWith(Alias); + B.GV->eraseFromParent(); +} + +void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc, + Function *AssumeFunc) { + // Find all virtual calls via a virtual table pointer %p under an assumption + // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p + // points to a member of the type identifier %md. Group calls by (type ID, + // offset) pair (effectively the identity of the virtual function) and store + // to CallSlots. + DenseSet<Value *> SeenPtrs; + for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end(); + I != E;) { + auto CI = dyn_cast<CallInst>(I->getUser()); + ++I; + if (!CI) + continue; + + // Search for virtual calls based on %p and add them to DevirtCalls. + SmallVector<DevirtCallSite, 1> DevirtCalls; + SmallVector<CallInst *, 1> Assumes; + findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI); + + // If we found any, add them to CallSlots. Only do this if we haven't seen + // the vtable pointer before, as it may have been CSE'd with pointers from + // other call sites, and we don't want to process call sites multiple times. + if (!Assumes.empty()) { + Metadata *TypeId = + cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata(); + Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); + if (SeenPtrs.insert(Ptr).second) { + for (DevirtCallSite Call : DevirtCalls) { + CallSlots[{TypeId, Call.Offset}].push_back( + {CI->getArgOperand(0), Call.CS, nullptr}); + } + } + } + + // We no longer need the assumes or the type test. + for (auto Assume : Assumes) + Assume->eraseFromParent(); + // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we + // may use the vtable argument later. + if (CI->use_empty()) + CI->eraseFromParent(); + } +} + +void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { + Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test); + + for (auto I = TypeCheckedLoadFunc->use_begin(), + E = TypeCheckedLoadFunc->use_end(); + I != E;) { + auto CI = dyn_cast<CallInst>(I->getUser()); + ++I; + if (!CI) + continue; + + Value *Ptr = CI->getArgOperand(0); + Value *Offset = CI->getArgOperand(1); + Value *TypeIdValue = CI->getArgOperand(2); + Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata(); + + SmallVector<DevirtCallSite, 1> DevirtCalls; + SmallVector<Instruction *, 1> LoadedPtrs; + SmallVector<Instruction *, 1> Preds; + bool HasNonCallUses = false; + findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds, + HasNonCallUses, CI); + + // Start by generating "pessimistic" code that explicitly loads the function + // pointer from the vtable and performs the type check. If possible, we will + // eliminate the load and the type check later. + + // If possible, only generate the load at the point where it is used. + // This helps avoid unnecessary spills. + IRBuilder<> LoadB( + (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI); + Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset); + Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy)); + Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr); + + for (Instruction *LoadedPtr : LoadedPtrs) { + LoadedPtr->replaceAllUsesWith(LoadedValue); + LoadedPtr->eraseFromParent(); + } + + // Likewise for the type test. + IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI); + CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue}); + + for (Instruction *Pred : Preds) { + Pred->replaceAllUsesWith(TypeTestCall); + Pred->eraseFromParent(); + } + + // We have already erased any extractvalue instructions that refer to the + // intrinsic call, but the intrinsic may have other non-extractvalue uses + // (although this is unlikely). In that case, explicitly build a pair and + // RAUW it. + if (!CI->use_empty()) { + Value *Pair = UndefValue::get(CI->getType()); + IRBuilder<> B(CI); + Pair = B.CreateInsertValue(Pair, LoadedValue, {0}); + Pair = B.CreateInsertValue(Pair, TypeTestCall, {1}); + CI->replaceAllUsesWith(Pair); + } + + // The number of unsafe uses is initially the number of uses. + auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall]; + NumUnsafeUses = DevirtCalls.size(); + + // If the function pointer has a non-call user, we cannot eliminate the type + // check, as one of those users may eventually call the pointer. Increment + // the unsafe use count to make sure it cannot reach zero. + if (HasNonCallUses) + ++NumUnsafeUses; + for (DevirtCallSite Call : DevirtCalls) { + CallSlots[{TypeId, Call.Offset}].push_back( + {Ptr, Call.CS, &NumUnsafeUses}); + } + + CI->eraseFromParent(); + } +} + +bool DevirtModule::run() { + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume)); + + if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || + AssumeFunc->use_empty()) && + (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) + return false; + + if (TypeTestFunc && AssumeFunc) + scanTypeTestUsers(TypeTestFunc, AssumeFunc); + + if (TypeCheckedLoadFunc) + scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); + + // Rebuild type metadata into a map for easy lookup. + std::vector<VTableBits> Bits; + DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; + buildTypeIdentifierMap(Bits, TypeIdMap); + if (TypeIdMap.empty()) + return true; + + // For each (type, offset) pair: + bool DidVirtualConstProp = false; + for (auto &S : CallSlots) { + // Search each of the members of the type identifier for the virtual + // function implementation at offset S.first.ByteOffset, and add to + // TargetsForSlot. + std::vector<VirtualCallTarget> TargetsForSlot; + if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], + S.first.ByteOffset)) + continue; + + if (trySingleImplDevirt(TargetsForSlot, S.second)) + continue; + + DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second); + } + + // If we were able to eliminate all unsafe uses for a type checked load, + // eliminate the type test by replacing it with true. + if (TypeCheckedLoadFunc) { + auto True = ConstantInt::getTrue(M.getContext()); + for (auto &&U : NumUnsafeUsesForTypeTest) { + if (U.second == 0) { + U.first->replaceAllUsesWith(True); + U.first->eraseFromParent(); + } + } + } + + // Rebuild each global we touched as part of virtual constant propagation to + // include the before and after bytes. + if (DidVirtualConstProp) + for (VTableBits &B : Bits) + rebuildGlobal(B); + + return true; +} |