summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp170
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt3
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp76
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp118
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp207
-rw-r--r--lib/Transforms/IPO/ElimAvailExtern.cpp62
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp17
-rw-r--r--lib/Transforms/IPO/ForceFunctionAttrs.cpp3
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp351
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp871
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp108
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp969
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp75
-rw-r--r--lib/Transforms/IPO/IPO.cpp32
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp928
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp11
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp29
-rw-r--r--lib/Transforms/IPO/Inliner.cpp253
-rw-r--r--lib/Transforms/IPO/Internalize.cpp225
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp29
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp (renamed from lib/Transforms/IPO/LowerBitSets.cpp)419
-rw-r--r--lib/Transforms/IPO/Makefile15
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp184
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp74
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp430
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp51
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp289
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp6
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp46
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp843
31 files changed, 3384 insertions, 3512 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0e05129b52617..0716a3a9cbe90 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -38,6 +38,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -68,6 +69,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getAAResultsAnalysisUsage(AU);
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -78,19 +80,8 @@ namespace {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
- /// A vector used to hold the indices of a single GEP instruction
- typedef std::vector<uint64_t> IndicesVector;
-
private:
- bool isDenselyPacked(Type *type, const DataLayout &DL);
- bool canPaddingBeAccessed(Argument *Arg);
- CallGraphNode *PromoteArguments(CallGraphNode *CGN);
- bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
- AAResults &AAR) const;
- CallGraphNode *DoPromotion(Function *F,
- SmallPtrSetImpl<Argument*> &ArgsToPromote,
- SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
-
+
using llvm::Pass::doInitialization;
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
@@ -98,6 +89,21 @@ namespace {
};
}
+/// A vector used to hold the indices of a single GEP instruction
+typedef std::vector<uint64_t> IndicesVector;
+
+static CallGraphNode *
+PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
+ function_ref<AAResults &(Function &F)> AARGetter,
+ unsigned MaxElements);
+static bool isDenselyPacked(Type *type, const DataLayout &DL);
+static bool canPaddingBeAccessed(Argument *Arg);
+static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
+ unsigned MaxElements);
+static CallGraphNode *
+DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG);
+
char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -111,16 +117,19 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
}
-bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+static bool runImpl(CallGraphSCC &SCC, CallGraph &CG,
+ function_ref<AAResults &(Function &F)> AARGetter,
+ unsigned MaxElements) {
bool Changed = false, LocalChange;
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- if (CallGraphNode *CGN = PromoteArguments(*I)) {
+ for (CallGraphNode *OldNode : SCC) {
+ if (CallGraphNode *NewNode =
+ PromoteArguments(OldNode, CG, AARGetter, MaxElements)) {
LocalChange = true;
- SCC.ReplaceNode(*I, CGN);
+ SCC.ReplaceNode(OldNode, NewNode);
}
}
Changed |= LocalChange; // Remember that we changed something.
@@ -129,8 +138,30 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
+
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+ // We compute dedicated AA results for each function in the SCC as needed. We
+ // use a lambda referencing external objects so that they live long enough to
+ // be queried, but we re-use them each time.
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+ AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+ return *AAR;
+ };
+
+ return runImpl(SCC, CG, AARGetter, maxElements);
+}
+
/// \brief Checks if a type could have padding bytes.
-bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
+static bool isDenselyPacked(Type *type, const DataLayout &DL) {
// There is no size information, so be conservative.
if (!type->isSized())
@@ -166,7 +197,7 @@ bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
}
/// \brief Checks if the padding bytes of an argument could be accessed.
-bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
+static bool canPaddingBeAccessed(Argument *arg) {
assert(arg->hasByValAttr());
@@ -207,7 +238,10 @@ bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
/// example, all callers are direct). If safe to promote some arguments, it
/// calls the DoPromotion method.
///
-CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+static CallGraphNode *
+PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
+ function_ref<AAResults &(Function &F)> AARGetter,
+ unsigned MaxElements) {
Function *F = CGN->getFunction();
// Make sure that it is local to this module.
@@ -242,20 +276,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
const DataLayout &DL = F->getParent()->getDataLayout();
- // We need to manually construct BasicAA directly in order to disable its use
- // of other function analyses.
- BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
-
- // Construct our own AA results for this function. We do this manually to
- // work around the limitations of the legacy pass manager.
- AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+ AAResults &AAR = AARGetter(*F);
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
- for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
- Argument *PtrArg = PointerArgs[i];
+ for (Argument *PtrArg : PointerArgs) {
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// Replace sret attribute with noalias. This reduces register pressure by
@@ -285,10 +312,10 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- if (maxElements > 0 && STy->getNumElements() > maxElements) {
+ if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
<< PtrArg->getName() << "' because it would require adding more"
- << " than " << maxElements << " arguments to the function.\n");
+ << " than " << MaxElements << " arguments to the function.\n");
continue;
}
@@ -302,7 +329,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow scalarrepl to hack on
+ // Passing the elements as a scalar will allow sroa to hack on
// the new alloca we introduce.
if (AllSimple) {
ByValArgsToTransform.insert(PtrArg);
@@ -328,7 +355,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
+ MaxElements))
ArgsToPromote.insert(PtrArg);
}
@@ -336,7 +364,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
return nullptr;
- return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+ return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG);
}
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
@@ -364,8 +392,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
/// elements in Prefix is the same as the corresponding elements in Longer.
///
/// This means it also returns true when Prefix and Longer are equal!
-static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
- const ArgPromotion::IndicesVector &Longer) {
+static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
if (Prefix.size() > Longer.size())
return false;
return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
@@ -373,9 +400,9 @@ static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
/// Checks if Indices, or a prefix of Indices, is in Set.
-static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
- std::set<ArgPromotion::IndicesVector> &Set) {
- std::set<ArgPromotion::IndicesVector>::iterator Low;
+static bool PrefixIn(const IndicesVector &Indices,
+ std::set<IndicesVector> &Set) {
+ std::set<IndicesVector>::iterator Low;
Low = Set.upper_bound(Indices);
if (Low != Set.begin())
Low--;
@@ -392,9 +419,9 @@ static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
/// already. Furthermore, any indices that Indices is itself a prefix of, are
/// removed from Safe (since they are implicitely safe because of Indices now).
-static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
- std::set<ArgPromotion::IndicesVector> &Safe) {
- std::set<ArgPromotion::IndicesVector>::iterator Low;
+static void MarkIndicesSafe(const IndicesVector &ToMark,
+ std::set<IndicesVector> &Safe) {
+ std::set<IndicesVector>::iterator Low;
Low = Safe.upper_bound(ToMark);
// Guard against the case where Safe is empty
if (Low != Safe.begin())
@@ -415,9 +442,9 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
Low = Safe.insert(Low, ToMark);
++Low;
// If there we're a prefix of longer index list(s), remove those
- std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+ std::set<IndicesVector>::iterator End = Safe.end();
while (Low != End && IsPrefix(ToMark, *Low)) {
- std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+ std::set<IndicesVector>::iterator Remove = Low;
++Low;
Safe.erase(Remove);
}
@@ -428,9 +455,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
-bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
- bool isByValOrInAlloca,
- AAResults &AAR) const {
+static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
+ AAResults &AAR, unsigned MaxElements) {
typedef std::set<IndicesVector> GEPIndicesSet;
// Quick exit for unused arguments
@@ -518,7 +544,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
+ return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
+ MaxElements);
}
// Ensure that all of the indices are constants.
@@ -552,10 +579,10 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// to make sure that we aren't promoting too many elements. If so, nothing
// to do.
if (ToPromote.find(Operands) == ToPromote.end()) {
- if (maxElements > 0 && ToPromote.size() == maxElements) {
+ if (MaxElements > 0 && ToPromote.size() == MaxElements) {
DEBUG(dbgs() << "argpromotion not promoting argument '"
<< Arg->getName() << "' because it would require adding more "
- << "than " << maxElements << " arguments to the function.\n");
+ << "than " << MaxElements << " arguments to the function.\n");
// We limit aggregate promotion to only promoting up to a fixed number
// of elements of the aggregate.
return false;
@@ -575,10 +602,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// blocks we know to be transparent to the load.
SmallPtrSet<BasicBlock*, 16> TranspBlocks;
- for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+ for (LoadInst *Load : Loads) {
// Check to see if the load is invalidated from the start of the block to
// the load itself.
- LoadInst *Load = Loads[i];
BasicBlock *BB = Load->getParent();
MemoryLocation Loc = MemoryLocation::get(Load);
@@ -604,9 +630,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function. At this point, we know that it's
/// safe to do so.
-CallGraphNode *ArgPromotion::DoPromotion(Function *F,
- SmallPtrSetImpl<Argument*> &ArgsToPromote,
- SmallPtrSetImpl<Argument*> &ByValArgsToTransform) {
+static CallGraphNode *
+DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
@@ -700,12 +726,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
// Add a parameter to the function for each element passed in.
- for (ScalarizeTable::iterator SI = ArgIndices.begin(),
- E = ArgIndices.end(); SI != E; ++SI) {
+ for (const auto &ArgIndex : ArgIndices) {
// not allowed to dereference ->begin() if size() is 0
Params.push_back(GetElementPtrInst::getIndexedType(
cast<PointerType>(I->getType()->getScalarType())->getElementType(),
- SI->second));
+ ArgIndex.second));
assert(Params.back());
}
@@ -745,10 +770,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Get the callgraph information that we need to update to reflect our
- // changes.
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-
// Get a new callgraph node for NF.
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
@@ -800,27 +821,25 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Store the Value* version of the indices in here, but declare it now
// for reuse.
std::vector<Value*> Ops;
- for (ScalarizeTable::iterator SI = ArgIndices.begin(),
- E = ArgIndices.end(); SI != E; ++SI) {
+ for (const auto &ArgIndex : ArgIndices) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
- if (!SI->second.empty()) {
- Ops.reserve(SI->second.size());
+ LoadInst *OrigLoad =
+ OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
+ if (!ArgIndex.second.empty()) {
+ Ops.reserve(ArgIndex.second.size());
Type *ElTy = V->getType();
- for (IndicesVector::const_iterator II = SI->second.begin(),
- IE = SI->second.end();
- II != IE; ++II) {
+ for (unsigned long II : ArgIndex.second) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
Type *IdxTy = (ElTy->isStructTy() ?
Type::getInt32Ty(F->getContext()) :
Type::getInt64Ty(F->getContext()));
- Ops.push_back(ConstantInt::get(IdxTy, *II));
+ Ops.push_back(ConstantInt::get(IdxTy, II));
// Keep track of the type we're currently indexing.
- ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(SI->first, V, Ops,
+ V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
V->getName() + ".idx", Call);
Ops.clear();
}
@@ -852,15 +871,18 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
AttributesVec.push_back(AttributeSet::get(Call->getContext(),
CallPAL.getFnAttributes()));
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, "", Call);
+ Args, OpBundles, "", Call);
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
AttributesVec));
} else {
- New = CallInst::Create(NF, Args, "", Call);
+ New = CallInst::Create(NF, Args, OpBundles, "", Call);
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
AttributesVec));
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 351b88fe2aa0b..d6782c738cbe1 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -19,7 +19,7 @@ add_llvm_library(LLVMipo
Inliner.cpp
Internalize.cpp
LoopExtractor.cpp
- LowerBitSets.cpp
+ LowerTypeTests.cpp
MergeFunctions.cpp
PartialInlining.cpp
PassManagerBuilder.cpp
@@ -27,6 +27,7 @@ add_llvm_library(LLVMipo
SampleProfile.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
+ WholeProgramDevirt.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 0aa49d6fde014..d75ed206ad23c 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -17,7 +17,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ConstantMerge.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -28,41 +28,13 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
using namespace llvm;
#define DEBUG_TYPE "constmerge"
STATISTIC(NumMerged, "Number of global constants merged");
-namespace {
- struct ConstantMerge : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- ConstantMerge() : ModulePass(ID) {
- initializeConstantMergePass(*PassRegistry::getPassRegistry());
- }
-
- // For this pass, process all of the globals in the module, eliminating
- // duplicate constants.
- bool runOnModule(Module &M) override;
-
- // Return true iff we can determine the alignment of this global variable.
- bool hasKnownAlignment(GlobalVariable *GV) const;
-
- // Return the alignment of the global, including converting the default
- // alignment to a concrete value.
- unsigned getAlignment(GlobalVariable *GV) const;
-
- };
-}
-
-char ConstantMerge::ID = 0;
-INITIALIZE_PASS(ConstantMerge, "constmerge",
- "Merge Duplicate Global Constants", false, false)
-
-ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
-
-
-
/// Find values that are marked as llvm.used.
static void FindUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
@@ -85,18 +57,17 @@ static bool IsBetterCanonical(const GlobalVariable &A,
if (A.hasLocalLinkage() && !B.hasLocalLinkage())
return false;
- return A.hasUnnamedAddr();
+ return A.hasGlobalUnnamedAddr();
}
-unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+static unsigned getAlignment(GlobalVariable *GV) {
unsigned Align = GV->getAlignment();
if (Align)
return Align;
return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
-bool ConstantMerge::runOnModule(Module &M) {
-
+static bool mergeConstants(Module &M) {
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
@@ -181,11 +152,11 @@ bool ConstantMerge::runOnModule(Module &M) {
if (!Slot || Slot == GV)
continue;
- if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+ if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
continue;
- if (!GV->hasUnnamedAddr())
- Slot->setUnnamedAddr(false);
+ if (!GV->hasGlobalUnnamedAddr())
+ Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
// Make all uses of the duplicate constant use the canonical version.
Replacements.push_back(std::make_pair(GV, Slot));
@@ -220,3 +191,34 @@ bool ConstantMerge::runOnModule(Module &M) {
Replacements.clear();
}
}
+
+PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
+ if (!mergeConstants(M))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct ConstantMergeLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ConstantMergeLegacyPass() : ModulePass(ID) {
+ initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ // For this pass, process all of the globals in the module, eliminating
+ // duplicate constants.
+ bool runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ return mergeConstants(M);
+ }
+};
+}
+
+char ConstantMergeLegacyPass::ID = 0;
+INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge",
+ "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() {
+ return new ConstantMergeLegacyPass();
+}
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index 5bbb7513005c6..58731eaf6e30f 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/CrossDSOCFI.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Statistic.h"
@@ -30,13 +30,14 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
#define DEBUG_TYPE "cross-dso-cfi"
-STATISTIC(TypeIds, "Number of unique type identifiers");
+STATISTIC(NumTypeIds, "Number of unique type identifiers");
namespace {
@@ -46,13 +47,10 @@ struct CrossDSOCFI : public ModulePass {
initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
}
- Module *M;
MDNode *VeryLikelyWeights;
- ConstantInt *extractBitSetTypeId(MDNode *MD);
- void buildCFICheck();
-
- bool doInitialization(Module &M) override;
+ ConstantInt *extractNumericTypeId(MDNode *MD);
+ void buildCFICheck(Module &M);
bool runOnModule(Module &M) override;
};
@@ -65,18 +63,10 @@ char CrossDSOCFI::ID = 0;
ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
-bool CrossDSOCFI::doInitialization(Module &Mod) {
- M = &Mod;
- VeryLikelyWeights =
- MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
-
- return false;
-}
-
-/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
-ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+/// Extracts a numeric type identifier from an MDNode containing type metadata.
+ConstantInt *CrossDSOCFI::extractNumericTypeId(MDNode *MD) {
// This check excludes vtables for classes inside anonymous namespaces.
- auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(1));
if (!TM)
return nullptr;
auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
@@ -84,68 +74,63 @@ ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
// We are looking for i64 constants.
if (C->getBitWidth() != 64) return nullptr;
- // Sanity check.
- auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
- // Can be null if a function was removed by an optimization.
- if (FM) {
- auto F = dyn_cast<Function>(FM->getValue());
- // But can never be a function declaration.
- assert(!F || !F->isDeclaration());
- (void)F; // Suppress unused variable warning in the no-asserts build.
- }
return C;
}
/// buildCFICheck - emits __cfi_check for the current module.
-void CrossDSOCFI::buildCFICheck() {
+void CrossDSOCFI::buildCFICheck(Module &M) {
// FIXME: verify that __cfi_check ends up near the end of the code section,
- // but before the jump slots created in LowerBitSets.
- llvm::DenseSet<uint64_t> BitSetIds;
- NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
-
- if (BitSetNM)
- for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
- if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
- BitSetIds.insert(TypeId->getZExtValue());
-
- LLVMContext &Ctx = M->getContext();
- Constant *C = M->getOrInsertFunction(
- "__cfi_check",
- FunctionType::get(
- Type::getVoidTy(Ctx),
- {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
- false));
+ // but before the jump slots created in LowerTypeTests.
+ llvm::DenseSet<uint64_t> TypeIds;
+ SmallVector<MDNode *, 2> Types;
+ for (GlobalObject &GO : M.global_objects()) {
+ Types.clear();
+ GO.getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ // Sanity check. GO must not be a function declaration.
+ assert(!isa<Function>(&GO) || !cast<Function>(&GO)->isDeclaration());
+
+ if (ConstantInt *TypeId = extractNumericTypeId(Type))
+ TypeIds.insert(TypeId->getZExtValue());
+ }
+ }
+
+ LLVMContext &Ctx = M.getContext();
+ Constant *C = M.getOrInsertFunction(
+ "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
+ Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr);
Function *F = dyn_cast<Function>(C);
F->setAlignment(4096);
auto args = F->arg_begin();
- Argument &CallSiteTypeId = *(args++);
+ Value &CallSiteTypeId = *(args++);
CallSiteTypeId.setName("CallSiteTypeId");
- Argument &Addr = *(args++);
+ Value &Addr = *(args++);
Addr.setName("Addr");
+ Value &CFICheckFailData = *(args++);
+ CFICheckFailData.setName("CFICheckFailData");
assert(args == F->arg_end());
BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+ BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
- BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
- IRBuilder<> IRBTrap(TrapBB);
- Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
- llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
- TrapCall->setDoesNotReturn();
- TrapCall->setDoesNotThrow();
- IRBTrap.CreateUnreachable();
+ BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F);
+ IRBuilder<> IRBFail(TrapBB);
+ Constant *CFICheckFailFn = M.getOrInsertFunction(
+ "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
+ Type::getInt8PtrTy(Ctx), nullptr);
+ IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
+ IRBFail.CreateBr(ExitBB);
- BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
IRBuilder<> IRBExit(ExitBB);
IRBExit.CreateRetVoid();
IRBuilder<> IRB(BB);
- SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
- for (uint64_t TypeId : BitSetIds) {
+ SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, TypeIds.size());
+ for (uint64_t TypeId : TypeIds) {
ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
IRBuilder<> IRBTest(TestBB);
- Function *BitsetTestFn =
- Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+ Function *BitsetTestFn = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
Value *Test = IRBTest.CreateCall(
BitsetTestFn, {&Addr, MetadataAsValue::get(
@@ -154,13 +139,26 @@ void CrossDSOCFI::buildCFICheck() {
BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
SI->addCase(CaseTypeId, TestBB);
- ++TypeIds;
+ ++NumTypeIds;
}
}
bool CrossDSOCFI::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ VeryLikelyWeights =
+ MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1);
if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
return false;
- buildCFICheck();
+ buildCFICheck(M);
return true;
}
+
+PreservedAnalyses CrossDSOCFIPass::run(Module &M, AnalysisManager<Module> &AM) {
+ CrossDSOCFI Impl;
+ bool Changed = Impl.runOnModule(M);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4de3d95ab11dc..c8c895b187962 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -17,8 +17,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -35,8 +34,8 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <map>
#include <set>
#include <tuple>
using namespace llvm;
@@ -51,77 +50,6 @@ namespace {
/// DAE - The dead argument elimination pass.
///
class DAE : public ModulePass {
- public:
-
- /// Struct that represents (part of) either a return value or a function
- /// argument. Used so that arguments and return values can be used
- /// interchangeably.
- struct RetOrArg {
- RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
- IsArg(IsArg) {}
- const Function *F;
- unsigned Idx;
- bool IsArg;
-
- /// Make RetOrArg comparable, so we can put it into a map.
- bool operator<(const RetOrArg &O) const {
- return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg);
- }
-
- /// Make RetOrArg comparable, so we can easily iterate the multimap.
- bool operator==(const RetOrArg &O) const {
- return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
- }
-
- std::string getDescription() const {
- return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
- " of function " + F->getName()).str();
- }
- };
-
- /// Liveness enum - During our initial pass over the program, we determine
- /// that things are either alive or maybe alive. We don't mark anything
- /// explicitly dead (even if we know they are), since anything not alive
- /// with no registered uses (in Uses) will never be marked alive and will
- /// thus become dead in the end.
- enum Liveness { Live, MaybeLive };
-
- /// Convenience wrapper
- RetOrArg CreateRet(const Function *F, unsigned Idx) {
- return RetOrArg(F, Idx, false);
- }
- /// Convenience wrapper
- RetOrArg CreateArg(const Function *F, unsigned Idx) {
- return RetOrArg(F, Idx, true);
- }
-
- typedef std::multimap<RetOrArg, RetOrArg> UseMap;
- /// This maps a return value or argument to any MaybeLive return values or
- /// arguments it uses. This allows the MaybeLive values to be marked live
- /// when any of its users is marked live.
- /// For example (indices are left out for clarity):
- /// - Uses[ret F] = ret G
- /// This means that F calls G, and F returns the value returned by G.
- /// - Uses[arg F] = ret G
- /// This means that some function calls G and passes its result as an
- /// argument to F.
- /// - Uses[ret F] = arg F
- /// This means that F returns one of its own arguments.
- /// - Uses[arg F] = arg G
- /// This means that G calls F and passes one of its own (G's) arguments
- /// directly to F.
- UseMap Uses;
-
- typedef std::set<RetOrArg> LiveSet;
- typedef std::set<const Function*> LiveFuncSet;
-
- /// This set contains all values that have been determined to be live.
- LiveSet LiveValues;
- /// This set contains all values that are cannot be changed in any way.
- LiveFuncSet LiveFunctions;
-
- typedef SmallVector<RetOrArg, 5> UseVector;
-
protected:
// DAH uses this to specify a different ID.
explicit DAE(char &ID) : ModulePass(ID) {}
@@ -132,25 +60,16 @@ namespace {
initializeDAEPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M) override;
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ DeadArgumentEliminationPass DAEP(ShouldHackArguments());
+ ModuleAnalysisManager DummyMAM;
+ PreservedAnalyses PA = DAEP.run(M, DummyMAM);
+ return !PA.areAllPreserved();
+ }
virtual bool ShouldHackArguments() const { return false; }
-
- private:
- Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
- Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
- unsigned RetValNum = -1U);
- Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
-
- void SurveyFunction(const Function &F);
- void MarkValue(const RetOrArg &RA, Liveness L,
- const UseVector &MaybeLiveUses);
- void MarkLive(const RetOrArg &RA);
- void MarkLive(const Function &F);
- void PropagateLiveness(const RetOrArg &RA);
- bool RemoveDeadStuffFromFunction(Function *F);
- bool DeleteDeadVarargs(Function &Fn);
- bool RemoveDeadArgumentsFromCallers(Function &Fn);
};
}
@@ -183,7 +102,7 @@ ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
/// llvm.vastart is never called, the varargs list is dead for the function.
-bool DAE::DeleteDeadVarargs(Function &Fn) {
+bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
@@ -200,9 +119,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Okay, we know we can transform this function if safe. Scan its body
// looking for calls marked musttail or calls to llvm.vastart.
- for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- CallInst *CI = dyn_cast<CallInst>(I);
+ for (BasicBlock &BB : Fn) {
+ for (Instruction &I : BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
if (CI->isMustTailCall())
@@ -229,6 +148,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, Fn.getLinkage());
NF->copyAttributesFrom(&Fn);
+ NF->setComdat(Fn.getComdat());
Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
NF->takeName(&Fn);
@@ -257,14 +177,17 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
}
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, "", Call);
+ Args, OpBundles, "", Call);
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
cast<InvokeInst>(New)->setAttributes(PAL);
} else {
- New = CallInst::Create(NF, Args, "", Call);
+ New = CallInst::Create(NF, Args, OpBundles, "", Call);
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
cast<CallInst>(New)->setAttributes(PAL);
if (cast<CallInst>(Call)->isTailCall())
@@ -316,8 +239,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
/// arguments that are unused, and changes the caller parameters to be undefined
/// instead.
-bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
-{
+bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
// We cannot change the arguments if this TU does not define the function or
// if the linker may choose a function body from another TU, even if the
// nominal linkage indicates that other copies of the function have the same
@@ -329,7 +251,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
// %v = load i32 %p
// ret void
// }
- if (!Fn.isStrongDefinitionForLinker())
+ if (!Fn.hasExactDefinition())
return false;
// Functions with local linkage should already have been handled, except the
@@ -409,7 +331,9 @@ static Type *getRetComponentType(const Function *F, unsigned Idx) {
/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
/// liveness of Use.
-DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+DeadArgumentEliminationPass::Liveness
+DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
+ UseVector &MaybeLiveUses) {
// We're live if our use or its Function is already marked as live.
if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
return Live;
@@ -428,8 +352,9 @@ DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
/// RetValNum is the return value number to use when this use is used in a
/// return instruction. This is used in the recursion, you should always leave
/// it at 0.
-DAE::Liveness DAE::SurveyUse(const Use *U,
- UseVector &MaybeLiveUses, unsigned RetValNum) {
+DeadArgumentEliminationPass::Liveness
+DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses,
+ unsigned RetValNum) {
const User *V = U->getUser();
if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
// The value is returned from a function. It's only live when the
@@ -442,13 +367,14 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
// We might be live, depending on the liveness of Use.
return MarkIfNotLive(Use, MaybeLiveUses);
} else {
- DAE::Liveness Result = MaybeLive;
+ DeadArgumentEliminationPass::Liveness Result = MaybeLive;
for (unsigned i = 0; i < NumRetVals(F); ++i) {
RetOrArg Use = CreateRet(F, i);
// We might be live, depending on the liveness of Use. If any
// sub-value is live, then the entire value is considered live. This
// is a conservative choice, and better tracking is possible.
- DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+ DeadArgumentEliminationPass::Liveness SubResult =
+ MarkIfNotLive(Use, MaybeLiveUses);
if (Result != Live)
Result = SubResult;
}
@@ -514,7 +440,9 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
/// the result is Live, MaybeLiveUses might be modified but its content should
/// be ignored (since it might not be complete).
-DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+DeadArgumentEliminationPass::Liveness
+DeadArgumentEliminationPass::SurveyUses(const Value *V,
+ UseVector &MaybeLiveUses) {
// Assume it's dead (which will only hold if there are no uses at all..).
Liveness Result = MaybeLive;
// Check each use.
@@ -534,7 +462,7 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
//
-void DAE::SurveyFunction(const Function &F) {
+void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Functions with inalloca parameters are expecting args in a particular
// register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
@@ -570,12 +498,13 @@ void DAE::SurveyFunction(const Function &F) {
return;
}
- if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+ if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) {
MarkLive(F);
return;
}
- DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting callers for fn: "
+ << F.getName() << "\n");
// Keep track of the number of live retvals, so we can skip checks once all
// of them turn out to be live.
unsigned NumLiveRetVals = 0;
@@ -637,7 +566,8 @@ void DAE::SurveyFunction(const Function &F) {
for (unsigned i = 0; i != RetCount; ++i)
MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
- DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: "
+ << F.getName() << "\n");
// Now, check all of our arguments.
unsigned i = 0;
@@ -669,17 +599,16 @@ void DAE::SurveyFunction(const Function &F) {
/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
/// such that RA will be marked live if any use in MaybeLiveUses gets marked
/// live later on.
-void DAE::MarkValue(const RetOrArg &RA, Liveness L,
- const UseVector &MaybeLiveUses) {
+void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L,
+ const UseVector &MaybeLiveUses) {
switch (L) {
case Live: MarkLive(RA); break;
case MaybeLive:
{
// Note any uses of this value, so this return value can be
// marked live whenever one of the uses becomes live.
- for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
- UE = MaybeLiveUses.end(); UI != UE; ++UI)
- Uses.insert(std::make_pair(*UI, RA));
+ for (const auto &MaybeLiveUse : MaybeLiveUses)
+ Uses.insert(std::make_pair(MaybeLiveUse, RA));
break;
}
}
@@ -689,8 +618,9 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
/// changed in any way. Additionally,
/// mark any values that are used as this function's parameters or by its return
/// values (according to Uses) live as well.
-void DAE::MarkLive(const Function &F) {
- DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+void DeadArgumentEliminationPass::MarkLive(const Function &F) {
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Intrinsically live fn: "
+ << F.getName() << "\n");
// Mark the function as live.
LiveFunctions.insert(&F);
// Mark all arguments as live.
@@ -704,20 +634,21 @@ void DAE::MarkLive(const Function &F) {
/// MarkLive - Mark the given return value or argument as live. Additionally,
/// mark any values that are used by this value (according to Uses) live as
/// well.
-void DAE::MarkLive(const RetOrArg &RA) {
+void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) {
if (LiveFunctions.count(RA.F))
return; // Function was already marked Live.
if (!LiveValues.insert(RA).second)
return; // We were already marked Live.
- DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking "
+ << RA.getDescription() << " live\n");
PropagateLiveness(RA);
}
/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
/// to any other values it uses (according to Uses).
-void DAE::PropagateLiveness(const RetOrArg &RA) {
+void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) {
// We don't use upper_bound (or equal_range) here, because our recursive call
// to ourselves is likely to cause the upper_bound (which is the first value
// not belonging to RA) to become erased and the iterator invalidated.
@@ -736,7 +667,7 @@ void DAE::PropagateLiveness(const RetOrArg &RA) {
// that are not in LiveValues. Transform the function and all of the callees of
// the function to not have these arguments and return values.
//
-bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Don't modify fully live functions
if (LiveFunctions.count(F))
return false;
@@ -777,8 +708,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
} else {
++NumArgumentsEliminated;
- DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
- << ") from " << F->getName() << "\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " << i
+ << " (" << I->getName() << ") from " << F->getName()
+ << "\n");
}
}
@@ -821,8 +753,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
NewRetIdxs[i] = RetTypes.size() - 1;
} else {
++NumRetValsEliminated;
- DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
- << F->getName() << "\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing return value "
+ << i << " from " << F->getName() << "\n");
}
}
if (RetTypes.size() > 1) {
@@ -882,6 +814,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Create the new function body and insert it into the module...
Function *NF = Function::Create(NFTy, F->getLinkage());
NF->copyAttributesFrom(F);
+ NF->setComdat(F->getComdat());
NF->setAttributes(NewPAL);
// Insert the new function before the old function, so we won't be processing
// it again.
@@ -950,14 +883,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Reconstruct the AttributesList based on the vector we constructed.
AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
+
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, "", Call->getParent());
+ Args, OpBundles, "", Call->getParent());
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
cast<InvokeInst>(New)->setAttributes(NewCallPAL);
} else {
- New = CallInst::Create(NF, Args, "", Call);
+ New = CallInst::Create(NF, Args, OpBundles, "", Call);
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
cast<CallInst>(New)->setAttributes(NewCallPAL);
if (cast<CallInst>(Call)->isTailCall())
@@ -1045,8 +981,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// If we change the return value of the function we must rewrite any return
// instructions. Check this now.
if (F->getReturnType() != NF->getReturnType())
- for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (BasicBlock &BB : *NF)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
Value *RetVal;
if (NFTy->getReturnType()->isVoidTy()) {
@@ -1081,7 +1017,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Replace the return instruction with one returning the new return
// value (possibly 0 if we became void).
ReturnInst::Create(F->getContext(), RetVal, RI);
- BB->getInstList().erase(RI);
+ BB.getInstList().erase(RI);
}
// Patch the pointer to LLVM function in debug info descriptor.
@@ -1093,14 +1029,15 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
return true;
}
-bool DAE::runOnModule(Module &M) {
+PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
+ ModuleAnalysisManager &) {
bool Changed = false;
// First pass: Do a simple check to see if any functions can have their "..."
// removed. We can do this if they never call va_start. This loop cannot be
// fused with the next loop, because deleting a function invalidates
// information computed while surveying other functions.
- DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n");
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
Function &F = *I++;
if (F.getFunctionType()->isVarArg())
@@ -1111,7 +1048,7 @@ bool DAE::runOnModule(Module &M) {
// We assume all arguments are dead unless proven otherwise (allowing us to
// determine that dead arguments passed into recursive functions are dead).
//
- DEBUG(dbgs() << "DAE - Determining liveness\n");
+ DEBUG(dbgs() << "DeadArgumentEliminationPass - Determining liveness\n");
for (auto &F : M)
SurveyFunction(F);
@@ -1129,5 +1066,7 @@ bool DAE::runOnModule(Module &M) {
for (auto &F : M)
Changed |= RemoveDeadArgumentsFromCallers(F);
- return Changed;
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
}
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index af313a6b001d7..98c4b1740306d 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -13,10 +13,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -26,30 +27,7 @@ using namespace llvm;
STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
-namespace {
-struct EliminateAvailableExternally : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- EliminateAvailableExternally() : ModulePass(ID) {
- initializeEliminateAvailableExternallyPass(
- *PassRegistry::getPassRegistry());
- }
-
- // run - Do the EliminateAvailableExternally pass on the specified module,
- // optionally updating the specified callgraph to reflect the changes.
- //
- bool runOnModule(Module &M) override;
-};
-}
-
-char EliminateAvailableExternally::ID = 0;
-INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern",
- "Eliminate Available Externally Globals", false, false)
-
-ModulePass *llvm::createEliminateAvailableExternallyPass() {
- return new EliminateAvailableExternally();
-}
-
-bool EliminateAvailableExternally::runOnModule(Module &M) {
+static bool eliminateAvailableExternally(Module &M) {
bool Changed = false;
// Drop initializers of available externally global variables.
@@ -82,3 +60,37 @@ bool EliminateAvailableExternally::runOnModule(Module &M) {
return Changed;
}
+
+PreservedAnalyses
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
+ if (!eliminateAvailableExternally(M))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct EliminateAvailableExternallyLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ EliminateAvailableExternallyLegacyPass() : ModulePass(ID) {
+ initializeEliminateAvailableExternallyLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ // run - Do the EliminateAvailableExternally pass on the specified module,
+ // optionally updating the specified callgraph to reflect the changes.
+ //
+ bool runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ return eliminateAvailableExternally(M);
+ }
+};
+}
+
+char EliminateAvailableExternallyLegacyPass::ID = 0;
+INITIALIZE_PASS(EliminateAvailableExternallyLegacyPass, "elim-avail-extern",
+ "Eliminate Available Externally Globals", false, false)
+
+ModulePass *llvm::createEliminateAvailableExternallyPass() {
+ return new EliminateAvailableExternallyLegacyPass();
+}
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 1a3b9253d72fc..479fd182598a7 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -68,6 +68,9 @@ namespace {
: ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
// Visit the global inline asm.
if (!deleteStuff)
M.setModuleInlineAsm("");
@@ -101,20 +104,20 @@ namespace {
}
// Visit the Functions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ for (Function &F : M) {
bool Delete =
- deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&F) && !F.isDeclaration();
if (!Delete) {
- if (I->hasAvailableExternallyLinkage())
+ if (F.hasAvailableExternallyLinkage())
continue;
}
- makeVisible(*I, Delete);
+ makeVisible(F, Delete);
if (Delete) {
// Make this a declaration and drop it's comdat.
- I->deleteBody();
- I->setComdat(nullptr);
+ F.deleteBody();
+ F.setComdat(nullptr);
}
}
@@ -128,7 +131,7 @@ namespace {
makeVisible(*CurI, Delete);
if (Delete) {
- Type *Ty = CurI->getType()->getElementType();
+ Type *Ty = CurI->getValueType();
CurI->removeFromParent();
llvm::Value *Declaration;
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 6df044762cf45..968712138208f 100644
--- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -80,7 +80,8 @@ static void addForcedAttributes(Function &F) {
}
}
-PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M,
+ ModuleAnalysisManager &) {
if (ForceAttributes.empty())
return PreservedAnalyses::all();
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 527fdd1885a4f..fff5440854148 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -13,6 +13,7 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -52,38 +53,6 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
namespace {
-struct PostOrderFunctionAttrs : public CallGraphSCCPass {
- static char ID; // Pass identification, replacement for typeid
- PostOrderFunctionAttrs() : CallGraphSCCPass(ID) {
- initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-
-private:
- TargetLibraryInfo *TLI;
-};
-}
-
-char PostOrderFunctionAttrs::ID = 0;
-INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false)
-
-Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); }
-
-namespace {
/// The three kinds of memory access relevant to 'readonly' and
/// 'readnone' attributes.
enum MemoryAccessKind {
@@ -100,9 +69,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
// Already perfect!
return MAK_ReadNone;
- // Definitions with weak linkage may be overridden at linktime with
- // something that writes memory, so treat them like declarations.
- if (F.isDeclaration() || F.mayBeOverridden()) {
+ // Non-exact function definitions may not be selected at link time, and an
+ // alternative version that writes to memory may be selected. See the comment
+ // on GlobalValue::isDefinitionExact for more details.
+ if (!F.hasExactDefinition()) {
if (AliasAnalysis::onlyReadsMemory(MRB))
return MAK_ReadOnly;
@@ -119,8 +89,12 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
// Detect these now, skipping to the next instruction if one is found.
CallSite CS(cast<Value>(I));
if (CS) {
- // Ignore calls to functions in the same SCC.
- if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ // Ignore calls to functions in the same SCC, as long as the call sites
+ // don't have operand bundles. Calls with operand bundles are allowed to
+ // have memory effects not described by the memory effects of the call
+ // target.
+ if (!CS.hasOperandBundles() && CS.getCalledFunction() &&
+ SCCNodes.count(CS.getCalledFunction()))
continue;
FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
@@ -311,8 +285,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
}
Function *F = CS.getCalledFunction();
- if (!F || F->isDeclaration() || F->mayBeOverridden() ||
- !SCCNodes.count(F)) {
+ if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) {
Captured = true;
return true;
}
@@ -490,6 +463,11 @@ determinePointerReadAttrs(Argument *A,
}
case Instruction::Load:
+ // A volatile load has side effects beyond what readonly can be relied
+ // upon.
+ if (cast<LoadInst>(I)->isVolatile())
+ return Attribute::None;
+
IsRead = true;
break;
@@ -517,9 +495,10 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
// Check each function in turn, determining which pointer arguments are not
// captured.
for (Function *F : SCCNodes) {
- // Definitions with weak linkage may be overridden at linktime with
- // something that captures pointers, so treat them like declarations.
- if (F->isDeclaration() || F->mayBeOverridden())
+ // We can infer and propagate function attributes only when we know that the
+ // definition we'll get at link time is *exactly* the definition we see now.
+ // For more details, see GlobalValue::mayBeDerefined.
+ if (!F->hasExactDefinition())
continue;
// Functions that are readonly (or readnone) and nounwind and don't return
@@ -557,12 +536,9 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
// then it must be calling into another function in our SCC. Save
// its particulars for Argument-SCC analysis later.
ArgumentGraphNode *Node = AG[&*A];
- for (SmallVectorImpl<Argument *>::iterator
- UI = Tracker.Uses.begin(),
- UE = Tracker.Uses.end();
- UI != UE; ++UI) {
- Node->Uses.push_back(AG[*UI]);
- if (*UI != A)
+ for (Argument *Use : Tracker.Uses) {
+ Node->Uses.push_back(AG[Use]);
+ if (Use != &*A)
HasNonLocalUses = true;
}
}
@@ -627,17 +603,15 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
- for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
- ArgumentSCCNodes.insert((*I)->Definition);
+ for (ArgumentGraphNode *I : ArgumentSCC) {
+ ArgumentSCCNodes.insert(I->Definition);
}
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
- for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
- UE = N->Uses.end();
- UI != UE; ++UI) {
- Argument *A = (*UI)->Definition;
+ for (ArgumentGraphNode *Use : N->Uses) {
+ Argument *A = Use->Definition;
if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
continue;
SCCCaptured = true;
@@ -703,8 +677,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
/// doesn't alias any other pointer visible to the caller.
static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
SmallSetVector<Value *, 8> FlowsToReturn;
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+ for (BasicBlock &BB : *F)
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
FlowsToReturn.insert(Ret->getReturnValue());
for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
@@ -772,9 +746,10 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
if (F->doesNotAlias(0))
continue;
- // Definitions with weak linkage may be overridden at linktime, so
- // treat them like declarations.
- if (F->isDeclaration() || F->mayBeOverridden())
+ // We can infer and propagate function attributes only when we know that the
+ // definition we'll get at link time is *exactly* the definition we see now.
+ // For more details, see GlobalValue::mayBeDerefined.
+ if (!F->hasExactDefinition())
return false;
// We annotate noalias return values, which are only applicable to
@@ -807,7 +782,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
/// \p Speculative based on whether the returned conclusion is a speculative
/// conclusion due to SCC calls.
static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
- const TargetLibraryInfo &TLI, bool &Speculative) {
+ bool &Speculative) {
assert(F->getReturnType()->isPointerTy() &&
"nonnull only meaningful on pointer types");
Speculative = false;
@@ -821,7 +796,7 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
Value *RetVal = FlowsToReturn[i];
// If this value is locally known to be non-null, we're good
- if (isKnownNonNull(RetVal, &TLI))
+ if (isKnownNonNull(RetVal))
continue;
// Otherwise, we need to look upwards since we can't make any local
@@ -870,8 +845,7 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
}
/// Deduce nonnull attributes for the SCC.
-static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
- const TargetLibraryInfo &TLI) {
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// Speculative that all functions in the SCC return only nonnull
// pointers. We may refute this as we analyze functions.
bool SCCReturnsNonNull = true;
@@ -886,9 +860,10 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
Attribute::NonNull))
continue;
- // Definitions with weak linkage may be overridden at linktime, so
- // treat them like declarations.
- if (F->isDeclaration() || F->mayBeOverridden())
+ // We can infer and propagate function attributes only when we know that the
+ // definition we'll get at link time is *exactly* the definition we see now.
+ // For more details, see GlobalValue::mayBeDerefined.
+ if (!F->hasExactDefinition())
return false;
// We annotate nonnull return values, which are only applicable to
@@ -897,7 +872,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
continue;
bool Speculative = false;
- if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+ if (isReturnNonNull(F, SCCNodes, Speculative)) {
if (!Speculative) {
// Mark the function eagerly since we may discover a function
// which prevents us from speculating about the entire SCC
@@ -930,6 +905,49 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
return MadeChange;
}
+/// Remove the convergent attribute from all functions in the SCC if every
+/// callsite within the SCC is not convergent (except for calls to functions
+/// within the SCC). Returns true if changes were made.
+static bool removeConvergentAttrs(const SCCNodeSet &SCCNodes) {
+ // For every function in SCC, ensure that either
+ // * it is not convergent, or
+ // * we can remove its convergent attribute.
+ bool HasConvergentFn = false;
+ for (Function *F : SCCNodes) {
+ if (!F->isConvergent()) continue;
+ HasConvergentFn = true;
+
+ // Can't remove convergent from function declarations.
+ if (F->isDeclaration()) return false;
+
+ // Can't remove convergent if any of our functions has a convergent call to a
+ // function not in the SCC.
+ for (Instruction &I : instructions(*F)) {
+ CallSite CS(&I);
+ // Bail if CS is a convergent call to a function not in the SCC.
+ if (CS && CS.isConvergent() &&
+ SCCNodes.count(CS.getCalledFunction()) == 0)
+ return false;
+ }
+ }
+
+ // If the SCC doesn't have any convergent functions, we have nothing to do.
+ if (!HasConvergentFn) return false;
+
+ // If we got here, all of the calls the SCC makes to functions not in the SCC
+ // are non-convergent. Therefore all of the SCC's functions can also be made
+ // non-convergent. We'll remove the attr from the callsites in
+ // InstCombineCalls.
+ for (Function *F : SCCNodes) {
+ if (!F->isConvergent()) continue;
+
+ DEBUG(dbgs() << "Removing convergent attr from fn " << F->getName()
+ << "\n");
+ F->setNotConvergent();
+ }
+ return true;
+}
+
static bool setDoesNotRecurse(Function &F) {
if (F.doesNotRecurse())
return false;
@@ -938,56 +956,129 @@ static bool setDoesNotRecurse(Function &F) {
return true;
}
-static bool addNoRecurseAttrs(const CallGraphSCC &SCC) {
+static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
- if (!SCC.isSingular())
+ if (SCCNodes.size() != 1)
return false;
- const CallGraphNode *CGN = *SCC.begin();
- Function *F = CGN->getFunction();
+ Function *F = *SCCNodes.begin();
if (!F || F->isDeclaration() || F->doesNotRecurse())
return false;
// If all of the calls in F are identifiable and are to norecurse functions, F
// is norecurse. This check also detects self-recursion as F is not currently
// marked norecurse, so any called from F to F will not be marked norecurse.
- if (std::all_of(CGN->begin(), CGN->end(),
- [](const CallGraphNode::CallRecord &CR) {
- Function *F = CR.second->getFunction();
- return F && F->doesNotRecurse();
- }))
- // Function calls a potentially recursive function.
- return setDoesNotRecurse(*F);
-
- // Nothing else we can deduce usefully during the postorder traversal.
- return false;
+ for (Instruction &I : instructions(*F))
+ if (auto CS = CallSite(&I)) {
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee || Callee == F || !Callee->doesNotRecurse())
+ // Function calls a potentially recursive function.
+ return false;
+ }
+
+ // Every call was to a non-recursive function other than this function, and
+ // we have no indirect recursion as the SCC size is one. This function cannot
+ // recurse.
+ return setDoesNotRecurse(*F);
}
-bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- bool Changed = false;
+PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager();
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
+ // We pass a lambda into functions to wire them up to the analysis manager
+ // for getting function analyses.
auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
+ return FAM.getResult<AAManager>(F);
};
+ // Fill SCCNodes with the elements of the SCC. Also track whether there are
+ // any external or opt-none nodes that will prevent us from optimizing any
+ // part of the SCC.
+ SCCNodeSet SCCNodes;
+ bool HasUnknownCall = false;
+ for (LazyCallGraph::Node &N : C) {
+ Function &F = N.getFunction();
+ if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ // Treat any function we're trying not to optimize as if it were an
+ // indirect call and omit it from the node set used below.
+ HasUnknownCall = true;
+ continue;
+ }
+ // Track whether any functions in this SCC have an unknown call edge.
+ // Note: if this is ever a performance hit, we can common it with
+ // subsequent routines which also do scans over the instructions of the
+ // function.
+ if (!HasUnknownCall)
+ for (Instruction &I : instructions(F))
+ if (auto CS = CallSite(&I))
+ if (!CS.getCalledFunction()) {
+ HasUnknownCall = true;
+ break;
+ }
+
+ SCCNodes.insert(&F);
+ }
+
+ bool Changed = false;
+ Changed |= addReadAttrs(SCCNodes, AARGetter);
+ Changed |= addArgumentAttrs(SCCNodes);
+
+ // If we have no external nodes participating in the SCC, we can deduce some
+ // more precise attributes as well.
+ if (!HasUnknownCall) {
+ Changed |= addNoAliasAttrs(SCCNodes);
+ Changed |= addNonNullAttrs(SCCNodes);
+ Changed |= removeConvergentAttrs(SCCNodes);
+ Changed |= addNoRecurseAttrs(SCCNodes);
+ }
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+namespace {
+struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
+ static char ID; // Pass identification, replacement for typeid
+ PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) {
+ initializePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnSCC(CallGraphSCC &SCC) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ getAAResultsAnalysisUsage(AU);
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char PostOrderFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs",
+ "Deduce function attributes", false, false)
+
+Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { return new PostOrderFunctionAttrsLegacyPass(); }
+
+template <typename AARGetterT>
+static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
+ bool Changed = false;
+
// Fill SCCNodes with the elements of the SCC. Used for quickly looking up
// whether a given CallGraphNode is in this SCC. Also track whether there are
// any external or opt-none nodes that will prevent us from optimizing any
// part of the SCC.
SCCNodeSet SCCNodes;
bool ExternalNode = false;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ for (CallGraphNode *I : SCC) {
+ Function *F = I->getFunction();
if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
// External node or function we're trying not to optimize - we both avoid
// transform them and avoid leveraging information they provide.
@@ -1005,28 +1096,37 @@ bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
// more precise attributes as well.
if (!ExternalNode) {
Changed |= addNoAliasAttrs(SCCNodes);
- Changed |= addNonNullAttrs(SCCNodes, *TLI);
+ Changed |= addNonNullAttrs(SCCNodes);
+ Changed |= removeConvergentAttrs(SCCNodes);
+ Changed |= addNoRecurseAttrs(SCCNodes);
}
- Changed |= addNoRecurseAttrs(SCC);
return Changed;
}
+bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
+
+ // We compute dedicated AA results for each function in the SCC as needed. We
+ // use a lambda referencing external objects so that they live long enough to
+ // be queried, but we re-use them each time.
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+ AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+ return *AAR;
+ };
+
+ return runImpl(SCC, AARGetter);
+}
+
namespace {
-/// A pass to do RPO deduction and propagation of function attributes.
-///
-/// This pass provides a general RPO or "top down" propagation of
-/// function attributes. For a few (rare) cases, we can deduce significantly
-/// more about function attributes by working in RPO, so this pass
-/// provides the compliment to the post-order pass above where the majority of
-/// deduction is performed.
-// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so
-// this is a boring module pass, but eventually it should be an RPO CGSCC pass
-// when such infrastructure is available.
-struct ReversePostOrderFunctionAttrs : public ModulePass {
+struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- ReversePostOrderFunctionAttrs() : ModulePass(ID) {
- initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) {
+ initializeReversePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
@@ -1034,19 +1134,20 @@ struct ReversePostOrderFunctionAttrs : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<CallGraphWrapperPass>();
+ AU.addPreserved<CallGraphWrapperPass>();
}
};
}
-char ReversePostOrderFunctionAttrs::ID = 0;
-INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+char ReversePostOrderFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs",
"Deduce function attributes in RPO", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs",
"Deduce function attributes in RPO", false, false)
Pass *llvm::createReversePostOrderFunctionAttrsPass() {
- return new ReversePostOrderFunctionAttrs();
+ return new ReversePostOrderFunctionAttrsLegacyPass();
}
static bool addNoRecurseAttrsTopDown(Function &F) {
@@ -1078,7 +1179,7 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
return setDoesNotRecurse(F);
}
-bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
+static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
// We only have a post-order SCC traversal (because SCCs are inherently
// discovered in post-order), so we accumulate them in a vector and then walk
// it in reverse. This is simpler than using the RPO iterator infrastructure
@@ -1086,7 +1187,6 @@ bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
// graph. We can also cheat egregiously because we're primarily interested in
// synthesizing norecurse and so we can only save the singular SCCs as SCCs
// with multiple functions in them will clearly be recursive.
- auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
SmallVector<Function *, 16> Worklist;
for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
if (I->size() != 1)
@@ -1104,3 +1204,24 @@ bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
return Changed;
}
+
+bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
+ return deduceFunctionAttributeInRPO(M, CG);
+}
+
+PreservedAnalyses
+ReversePostOrderFunctionAttrsPass::run(Module &M, AnalysisManager<Module> &AM) {
+ auto &CG = AM.getResult<CallGraphAnalysis>(M);
+
+ bool Changed = deduceFunctionAttributeInRPO(M, CG);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<CallGraphAnalysis>();
+ return PA;
+}
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 5e0df95051192..c9d075e763250 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -13,329 +13,670 @@
#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
-#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
-#include <map>
+#define DEBUG_TYPE "function-import"
using namespace llvm;
-#define DEBUG_TYPE "function-import"
+STATISTIC(NumImported, "Number of functions imported");
/// Limit on instruction count of imported functions.
static cl::opt<unsigned> ImportInstrLimit(
"import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
cl::desc("Only import functions with less than N instructions"));
+static cl::opt<float>
+ ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
+ cl::Hidden, cl::value_desc("x"),
+ cl::desc("As we import functions, multiply the "
+ "`import-instr-limit` threshold by this factor "
+ "before processing newly imported functions"));
+
+static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
+ cl::desc("Print imported functions"));
+
+// Temporary allows the function import pass to disable always linking
+// referenced discardable symbols.
+static cl::opt<bool>
+ DontForceImportReferencedDiscardableSymbols("disable-force-link-odr",
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EnableImportMetadata(
+ "enable-import-metadata", cl::init(
+#if !defined(NDEBUG)
+ true /*Enabled with asserts.*/
+#else
+ false
+#endif
+ ),
+ cl::Hidden, cl::desc("Enable import metadata like 'thinlto_src_module'"));
+
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
SMDiagnostic Err;
DEBUG(dbgs() << "Loading '" << FileName << "'\n");
- // Metadata isn't loaded or linked until after all functions are
- // imported, after which it will be materialized and linked.
+ // Metadata isn't loaded until functions are imported, to minimize
+ // the memory overhead.
std::unique_ptr<Module> Result =
getLazyIRFileModule(FileName, Err, Context,
/* ShouldLazyLoadMetadata = */ true);
if (!Result) {
Err.print("function-import", errs());
- return nullptr;
+ report_fatal_error("Abort");
}
return Result;
}
namespace {
-/// Helper to load on demand a Module from file and cache it for subsequent
-/// queries. It can be used with the FunctionImporter.
-class ModuleLazyLoaderCache {
- /// Cache of lazily loaded module for import.
- StringMap<std::unique_ptr<Module>> ModuleMap;
- /// Retrieve a Module from the cache or lazily load it on demand.
- std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+// Return true if the Summary describes a GlobalValue that can be externally
+// referenced, i.e. it does not need renaming (linkage is not local) or renaming
+// is possible (does not have a section for instance).
+static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) {
+ if (!Summary.needsRenaming())
+ return true;
-public:
- /// Create the loader, Module will be initialized in \p Context.
- ModuleLazyLoaderCache(std::function<
- std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
- : createLazyModule(createLazyModule) {}
-
- /// Retrieve a Module from the cache or lazily load it on demand.
- Module &operator()(StringRef FileName);
-
- std::unique_ptr<Module> takeModule(StringRef FileName) {
- auto I = ModuleMap.find(FileName);
- assert(I != ModuleMap.end());
- std::unique_ptr<Module> Ret = std::move(I->second);
- ModuleMap.erase(I);
- return Ret;
- }
-};
+ if (Summary.hasSection())
+ // Can't rename a global that needs renaming if has a section.
+ return false;
-// Get a Module for \p FileName from the cache, or load it lazily.
-Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
- auto &Module = ModuleMap[Identifier];
- if (!Module)
- Module = createLazyModule(Identifier);
- return *Module;
+ return true;
}
-} // anonymous namespace
-/// Walk through the instructions in \p F looking for external
-/// calls not already in the \p CalledFunctions set. If any are
-/// found they are added to the \p Worklist for importing.
-static void findExternalCalls(const Module &DestModule, Function &F,
- const FunctionInfoIndex &Index,
- StringSet<> &CalledFunctions,
- SmallVector<StringRef, 64> &Worklist) {
- // We need to suffix internal function calls imported from other modules,
- // prepare the suffix ahead of time.
- std::string Suffix;
- if (F.getParent() != &DestModule)
- Suffix =
- (Twine(".llvm.") +
- Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
-
- for (auto &BB : F) {
- for (auto &I : BB) {
- if (isa<CallInst>(I)) {
- auto CalledFunction = cast<CallInst>(I).getCalledFunction();
- // Insert any new external calls that have not already been
- // added to set/worklist.
- if (!CalledFunction || !CalledFunction->hasName())
- continue;
- // Ignore intrinsics early
- if (CalledFunction->isIntrinsic()) {
- assert(CalledFunction->getIntrinsicID() != 0);
- continue;
- }
- auto ImportedName = CalledFunction->getName();
- auto Renamed = (ImportedName + Suffix).str();
- // Rename internal functions
- if (CalledFunction->hasInternalLinkage()) {
- ImportedName = Renamed;
- }
- auto It = CalledFunctions.insert(ImportedName);
- if (!It.second) {
- // This is a call to a function we already considered, skip.
- continue;
- }
- // Ignore functions already present in the destination module
- auto *SrcGV = DestModule.getNamedValue(ImportedName);
- if (SrcGV) {
- if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
- SrcGV = SGA->getBaseObject();
- assert(isa<Function>(SrcGV) && "Name collision during import");
- if (!cast<Function>(SrcGV)->isDeclaration()) {
- DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
- << ImportedName << " already in DestinationModule\n");
- continue;
- }
+// Return true if \p GUID describes a GlobalValue that can be externally
+// referenced, i.e. it does not need renaming (linkage is not local) or
+// renaming is possible (does not have a section for instance).
+static bool canBeExternallyReferenced(const ModuleSummaryIndex &Index,
+ GlobalValue::GUID GUID) {
+ auto Summaries = Index.findGlobalValueSummaryList(GUID);
+ if (Summaries == Index.end())
+ return true;
+ if (Summaries->second.size() != 1)
+ // If there are multiple globals with this GUID, then we know it is
+ // not a local symbol, and it is necessarily externally referenced.
+ return true;
+
+ // We don't need to check for the module path, because if it can't be
+ // externally referenced and we call it, it is necessarilly in the same
+ // module
+ return canBeExternallyReferenced(**Summaries->second.begin());
+}
+
+// Return true if the global described by \p Summary can be imported in another
+// module.
+static bool eligibleForImport(const ModuleSummaryIndex &Index,
+ const GlobalValueSummary &Summary) {
+ if (!canBeExternallyReferenced(Summary))
+ // Can't import a global that needs renaming if has a section for instance.
+ // FIXME: we may be able to import it by copying it without promotion.
+ return false;
+
+ // Check references (and potential calls) in the same module. If the current
+ // value references a global that can't be externally referenced it is not
+ // eligible for import.
+ bool AllRefsCanBeExternallyReferenced =
+ llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) {
+ return canBeExternallyReferenced(Index, VI.getGUID());
+ });
+ if (!AllRefsCanBeExternallyReferenced)
+ return false;
+
+ if (auto *FuncSummary = dyn_cast<FunctionSummary>(&Summary)) {
+ bool AllCallsCanBeExternallyReferenced = llvm::all_of(
+ FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
+ return canBeExternallyReferenced(Index, Edge.first.getGUID());
+ });
+ if (!AllCallsCanBeExternallyReferenced)
+ return false;
+ }
+ return true;
+}
+
+/// Given a list of possible callee implementation for a call site, select one
+/// that fits the \p Threshold.
+///
+/// FIXME: select "best" instead of first that fits. But what is "best"?
+/// - The smallest: more likely to be inlined.
+/// - The one with the least outgoing edges (already well optimized).
+/// - One from a module already being imported from in order to reduce the
+/// number of source modules parsed/linked.
+/// - One that has PGO data attached.
+/// - [insert you fancy metric here]
+static const GlobalValueSummary *
+selectCallee(const ModuleSummaryIndex &Index,
+ const GlobalValueSummaryList &CalleeSummaryList,
+ unsigned Threshold) {
+ auto It = llvm::find_if(
+ CalleeSummaryList,
+ [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
+ auto *GVSummary = SummaryPtr.get();
+ if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
+ // There is no point in importing these, we can't inline them
+ return false;
+ if (auto *AS = dyn_cast<AliasSummary>(GVSummary)) {
+ GVSummary = &AS->getAliasee();
+ // Alias can't point to "available_externally". However when we import
+ // linkOnceODR the linkage does not change. So we import the alias
+ // and aliasee only in this case.
+ // FIXME: we should import alias as available_externally *function*,
+ // the destination module does need to know it is an alias.
+ if (!GlobalValue::isLinkOnceODRLinkage(GVSummary->linkage()))
+ return false;
}
- Worklist.push_back(It.first->getKey());
- DEBUG(dbgs() << DestModule.getModuleIdentifier()
- << ": Adding callee for : " << ImportedName << " : "
- << F.getName() << "\n");
- }
- }
+ auto *Summary = cast<FunctionSummary>(GVSummary);
+
+ if (Summary->instCount() > Threshold)
+ return false;
+
+ if (!eligibleForImport(Index, *Summary))
+ return false;
+
+ return true;
+ });
+ if (It == CalleeSummaryList.end())
+ return nullptr;
+
+ return cast<GlobalValueSummary>(It->get());
+}
+
+/// Return the summary for the function \p GUID that fits the \p Threshold, or
+/// null if there's no match.
+static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID,
+ unsigned Threshold,
+ const ModuleSummaryIndex &Index) {
+ auto CalleeSummaryList = Index.findGlobalValueSummaryList(GUID);
+ if (CalleeSummaryList == Index.end())
+ return nullptr; // This function does not have a summary
+ return selectCallee(Index, CalleeSummaryList->second, Threshold);
+}
+
+/// Mark the global \p GUID as export by module \p ExportModulePath if found in
+/// this module. If it is a GlobalVariable, we also mark any referenced global
+/// in the current module as exported.
+static void exportGlobalInModule(const ModuleSummaryIndex &Index,
+ StringRef ExportModulePath,
+ GlobalValue::GUID GUID,
+ FunctionImporter::ExportSetTy &ExportList) {
+ auto FindGlobalSummaryInModule =
+ [&](GlobalValue::GUID GUID) -> GlobalValueSummary *{
+ auto SummaryList = Index.findGlobalValueSummaryList(GUID);
+ if (SummaryList == Index.end())
+ // This global does not have a summary, it is not part of the ThinLTO
+ // process
+ return nullptr;
+ auto SummaryIter = llvm::find_if(
+ SummaryList->second,
+ [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return Summary->modulePath() == ExportModulePath;
+ });
+ if (SummaryIter == SummaryList->second.end())
+ return nullptr;
+ return SummaryIter->get();
+ };
+
+ auto *Summary = FindGlobalSummaryInModule(GUID);
+ if (!Summary)
+ return;
+ // We found it in the current module, mark as exported
+ ExportList.insert(GUID);
+
+ auto GVS = dyn_cast<GlobalVarSummary>(Summary);
+ if (!GVS)
+ return;
+ // FunctionImportGlobalProcessing::doPromoteLocalToGlobal() will always
+ // trigger importing the initializer for `constant unnamed addr` globals that
+ // are referenced. We conservatively export all the referenced symbols for
+ // every global to workaround this, so that the ExportList is accurate.
+ // FIXME: with a "isConstant" flag in the summary we could be more targetted.
+ for (auto &Ref : GVS->refs()) {
+ auto GUID = Ref.getGUID();
+ auto *RefSummary = FindGlobalSummaryInModule(GUID);
+ if (RefSummary)
+ // Found a ref in the current module, mark it as exported
+ ExportList.insert(GUID);
}
}
-// Helper function: given a worklist and an index, will process all the worklist
-// and decide what to import based on the summary information.
-//
-// Nothing is actually imported, functions are materialized in their source
-// module and analyzed there.
-//
-// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
-// per Module.
-static void GetImportList(Module &DestModule,
- SmallVector<StringRef, 64> &Worklist,
- StringSet<> &CalledFunctions,
- std::map<StringRef, DenseSet<const GlobalValue *>>
- &ModuleToFunctionsToImportMap,
- const FunctionInfoIndex &Index,
- ModuleLazyLoaderCache &ModuleLoaderCache) {
- while (!Worklist.empty()) {
- auto CalledFunctionName = Worklist.pop_back_val();
- DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
- << CalledFunctionName << "\n");
-
- // Try to get a summary for this function call.
- auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
- if (InfoList == Index.end()) {
- DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
- << CalledFunctionName << " Ignoring.\n");
+using EdgeInfo = std::pair<const FunctionSummary *, unsigned /* Threshold */>;
+
+/// Compute the list of functions to import for a given caller. Mark these
+/// imported functions and the symbols they reference in their source module as
+/// exported from their source module.
+static void computeImportForFunction(
+ const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
+ unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
+ SmallVectorImpl<EdgeInfo> &Worklist,
+ FunctionImporter::ImportMapTy &ImportsForModule,
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
+ for (auto &Edge : Summary.calls()) {
+ auto GUID = Edge.first.getGUID();
+ DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
+
+ if (DefinedGVSummaries.count(GUID)) {
+ DEBUG(dbgs() << "ignored! Target already in destination module.\n");
continue;
}
- assert(!InfoList->second.empty() && "No summary, error at import?");
-
- // Comdat can have multiple entries, FIXME: what do we do with them?
- auto &Info = InfoList->second[0];
- assert(Info && "Nullptr in list, error importing summaries?\n");
-
- auto *Summary = Info->functionSummary();
- if (!Summary) {
- // FIXME: in case we are lazyloading summaries, we can do it now.
- DEBUG(dbgs() << DestModule.getModuleIdentifier()
- << ": Missing summary for " << CalledFunctionName
- << ", error at import?\n");
- llvm_unreachable("Missing summary");
- }
- if (Summary->instCount() > ImportInstrLimit) {
- DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
- << CalledFunctionName << " with " << Summary->instCount()
- << " instructions (limit " << ImportInstrLimit << ")\n");
+ auto *CalleeSummary = selectCallee(GUID, Threshold, Index);
+ if (!CalleeSummary) {
+ DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
continue;
}
-
- // Get the module path from the summary.
- auto ModuleIdentifier = Summary->modulePath();
- DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
- << CalledFunctionName << " from " << ModuleIdentifier << "\n");
-
- auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
-
- // The function that we will import!
- GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
-
- if (!SGV) {
- // The destination module is referencing function using their renamed name
- // when importing a function that was originally local in the source
- // module. The source module we have might not have been renamed so we try
- // to remove the suffix added during the renaming to recover the original
- // name in the source module.
- std::pair<StringRef, StringRef> Split =
- CalledFunctionName.split(".llvm.");
- SGV = SrcModule.getNamedValue(Split.first);
- assert(SGV && "Can't find function to import in source module");
+ // "Resolve" the summary, traversing alias,
+ const FunctionSummary *ResolvedCalleeSummary;
+ if (isa<AliasSummary>(CalleeSummary)) {
+ ResolvedCalleeSummary = cast<FunctionSummary>(
+ &cast<AliasSummary>(CalleeSummary)->getAliasee());
+ assert(
+ GlobalValue::isLinkOnceODRLinkage(ResolvedCalleeSummary->linkage()) &&
+ "Unexpected alias to a non-linkonceODR in import list");
+ } else
+ ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
+
+ assert(ResolvedCalleeSummary->instCount() <= Threshold &&
+ "selectCallee() didn't honor the threshold");
+
+ auto ExportModulePath = ResolvedCalleeSummary->modulePath();
+ auto &ProcessedThreshold = ImportsForModule[ExportModulePath][GUID];
+ /// Since the traversal of the call graph is DFS, we can revisit a function
+ /// a second time with a higher threshold. In this case, it is added back to
+ /// the worklist with the new threshold.
+ if (ProcessedThreshold && ProcessedThreshold >= Threshold) {
+ DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
+ << ProcessedThreshold << "\n");
+ continue;
}
- if (!SGV) {
- report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
- "' in Module '" + SrcModule.getModuleIdentifier() +
- "', error in the summary?\n");
+ // Mark this function as imported in this module, with the current Threshold
+ ProcessedThreshold = Threshold;
+
+ // Make exports in the source module.
+ if (ExportLists) {
+ auto &ExportList = (*ExportLists)[ExportModulePath];
+ ExportList.insert(GUID);
+ // Mark all functions and globals referenced by this function as exported
+ // to the outside if they are defined in the same source module.
+ for (auto &Edge : ResolvedCalleeSummary->calls()) {
+ auto CalleeGUID = Edge.first.getGUID();
+ exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
+ }
+ for (auto &Ref : ResolvedCalleeSummary->refs()) {
+ auto GUID = Ref.getGUID();
+ exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
+ }
}
- Function *F = dyn_cast<Function>(SGV);
- if (!F && isa<GlobalAlias>(SGV)) {
- auto *SGA = dyn_cast<GlobalAlias>(SGV);
- F = dyn_cast<Function>(SGA->getBaseObject());
- CalledFunctionName = F->getName();
- }
- assert(F && "Imported Function is ... not a Function");
-
- // We cannot import weak_any functions/aliases without possibly affecting
- // the order they are seen and selected by the linker, changing program
- // semantics.
- if (SGV->hasWeakAnyLinkage()) {
- DEBUG(dbgs() << DestModule.getModuleIdentifier()
- << ": Ignoring import request for weak-any "
- << (isa<Function>(SGV) ? "function " : "alias ")
- << CalledFunctionName << " from "
- << SrcModule.getModuleIdentifier() << "\n");
+ // Insert the newly imported function to the worklist.
+ Worklist.push_back(std::make_pair(ResolvedCalleeSummary, Threshold));
+ }
+}
+
+/// Given the list of globals defined in a module, compute the list of imports
+/// as well as the list of "exports", i.e. the list of symbols referenced from
+/// another module (that may require promotion).
+static void ComputeImportForModule(
+ const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
+ FunctionImporter::ImportMapTy &ImportsForModule,
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
+ // Worklist contains the list of function imported in this module, for which
+ // we will analyse the callees and may import further down the callgraph.
+ SmallVector<EdgeInfo, 128> Worklist;
+
+ // Populate the worklist with the import for the functions in the current
+ // module
+ for (auto &GVSummary : DefinedGVSummaries) {
+ auto *Summary = GVSummary.second;
+ if (auto *AS = dyn_cast<AliasSummary>(Summary))
+ Summary = &AS->getAliasee();
+ auto *FuncSummary = dyn_cast<FunctionSummary>(Summary);
+ if (!FuncSummary)
+ // Skip import for global variables
continue;
- }
+ DEBUG(dbgs() << "Initalize import for " << GVSummary.first << "\n");
+ computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
+ DefinedGVSummaries, Worklist, ImportsForModule,
+ ExportLists);
+ }
- // Add the function to the import list
- auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
- Entry.insert(F);
+ while (!Worklist.empty()) {
+ auto FuncInfo = Worklist.pop_back_val();
+ auto *Summary = FuncInfo.first;
+ auto Threshold = FuncInfo.second;
// Process the newly imported functions and add callees to the worklist.
- F->materialize();
- findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+ // Adjust the threshold
+ Threshold = Threshold * ImportInstrFactor;
+
+ computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries,
+ Worklist, ImportsForModule, ExportLists);
}
}
+} // anonymous namespace
+
+/// Compute all the import and export for every module using the Index.
+void llvm::ComputeCrossModuleImport(
+ const ModuleSummaryIndex &Index,
+ const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ StringMap<FunctionImporter::ImportMapTy> &ImportLists,
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
+ // For each module that has function defined, compute the import/export lists.
+ for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
+ auto &ImportsForModule = ImportLists[DefinedGVSummaries.first()];
+ DEBUG(dbgs() << "Computing import for Module '"
+ << DefinedGVSummaries.first() << "'\n");
+ ComputeImportForModule(DefinedGVSummaries.second, Index, ImportsForModule,
+ &ExportLists);
+ }
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
+ << " modules:\n");
+ for (auto &ModuleImports : ImportLists) {
+ auto ModName = ModuleImports.first();
+ auto &Exports = ExportLists[ModName];
+ DEBUG(dbgs() << "* Module " << ModName << " exports " << Exports.size()
+ << " functions. Imports from " << ModuleImports.second.size()
+ << " modules.\n");
+ for (auto &Src : ModuleImports.second) {
+ auto SrcModName = Src.first();
+ DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
+ << SrcModName << "\n");
+ }
+ }
+#endif
+}
+
+/// Compute all the imports for the given module in the Index.
+void llvm::ComputeCrossModuleImportForModule(
+ StringRef ModulePath, const ModuleSummaryIndex &Index,
+ FunctionImporter::ImportMapTy &ImportList) {
+
+ // Collect the list of functions this module defines.
+ // GUID -> Summary
+ GVSummaryMapTy FunctionSummaryMap;
+ Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
+
+ // Compute the import list for this module.
+ DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
+ ComputeImportForModule(FunctionSummaryMap, Index, ImportList);
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
+ << ImportList.size() << " modules.\n");
+ for (auto &Src : ImportList) {
+ auto SrcModName = Src.first();
+ DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
+ << SrcModName << "\n");
+ }
+#endif
+}
+
+/// Compute the set of summaries needed for a ThinLTO backend compilation of
+/// \p ModulePath.
+void llvm::gatherImportedSummariesForModule(
+ StringRef ModulePath,
+ const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ const StringMap<FunctionImporter::ImportMapTy> &ImportLists,
+ std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
+ // Include all summaries from the importing module.
+ ModuleToSummariesForIndex[ModulePath] =
+ ModuleToDefinedGVSummaries.lookup(ModulePath);
+ auto ModuleImports = ImportLists.find(ModulePath);
+ if (ModuleImports != ImportLists.end()) {
+ // Include summaries for imports.
+ for (auto &ILI : ModuleImports->second) {
+ auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()];
+ const auto &DefinedGVSummaries =
+ ModuleToDefinedGVSummaries.lookup(ILI.first());
+ for (auto &GI : ILI.second) {
+ const auto &DS = DefinedGVSummaries.find(GI.first);
+ assert(DS != DefinedGVSummaries.end() &&
+ "Expected a defined summary for imported global value");
+ SummariesForIndex[GI.first] = DS->second;
+ }
+ }
+ }
+}
+
+/// Emit the files \p ModulePath will import from into \p OutputFilename.
+std::error_code llvm::EmitImportsFiles(
+ StringRef ModulePath, StringRef OutputFilename,
+ const StringMap<FunctionImporter::ImportMapTy> &ImportLists) {
+ auto ModuleImports = ImportLists.find(ModulePath);
+ std::error_code EC;
+ raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
+ if (EC)
+ return EC;
+ if (ModuleImports != ImportLists.end())
+ for (auto &ILI : ModuleImports->second)
+ ImportsOS << ILI.first() << "\n";
+ return std::error_code();
+}
+
+/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis.
+void llvm::thinLTOResolveWeakForLinkerModule(
+ Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
+ auto updateLinkage = [&](GlobalValue &GV) {
+ if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
+ return;
+ // See if the global summary analysis computed a new resolved linkage.
+ const auto &GS = DefinedGlobals.find(GV.getGUID());
+ if (GS == DefinedGlobals.end())
+ return;
+ auto NewLinkage = GS->second->linkage();
+ if (NewLinkage == GV.getLinkage())
+ return;
+ DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
+ << GV.getLinkage() << " to " << NewLinkage << "\n");
+ GV.setLinkage(NewLinkage);
+ };
+
+ // Process functions and global now
+ for (auto &GV : TheModule)
+ updateLinkage(GV);
+ for (auto &GV : TheModule.globals())
+ updateLinkage(GV);
+ for (auto &GV : TheModule.aliases())
+ updateLinkage(GV);
+}
+
+/// Run internalization on \p TheModule based on symmary analysis.
+void llvm::thinLTOInternalizeModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals) {
+ // Parse inline ASM and collect the list of symbols that are not defined in
+ // the current module.
+ StringSet<> AsmUndefinedRefs;
+ object::IRObjectFile::CollectAsmUndefinedRefs(
+ Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
+ [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ if (Flags & object::BasicSymbolRef::SF_Undefined)
+ AsmUndefinedRefs.insert(Name);
+ });
+
+ // Declare a callback for the internalize pass that will ask for every
+ // candidate GlobalValue if it can be internalized or not.
+ auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
+ // Can't be internalized if referenced in inline asm.
+ if (AsmUndefinedRefs.count(GV.getName()))
+ return true;
+
+ // Lookup the linkage recorded in the summaries during global analysis.
+ const auto &GS = DefinedGlobals.find(GV.getGUID());
+ GlobalValue::LinkageTypes Linkage;
+ if (GS == DefinedGlobals.end()) {
+ // Must have been promoted (possibly conservatively). Find original
+ // name so that we can access the correct summary and see if it can
+ // be internalized again.
+ // FIXME: Eventually we should control promotion instead of promoting
+ // and internalizing again.
+ StringRef OrigName =
+ ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
+ std::string OrigId = GlobalValue::getGlobalIdentifier(
+ OrigName, GlobalValue::InternalLinkage,
+ TheModule.getSourceFileName());
+ const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
+ if (GS == DefinedGlobals.end()) {
+ // Also check the original non-promoted non-globalized name. In some
+ // cases a preempted weak value is linked in as a local copy because
+ // it is referenced by an alias (IRLinker::linkGlobalValueProto).
+ // In that case, since it was originally not a local value, it was
+ // recorded in the index using the original name.
+ // FIXME: This may not be needed once PR27866 is fixed.
+ const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
+ assert(GS != DefinedGlobals.end());
+ Linkage = GS->second->linkage();
+ } else {
+ Linkage = GS->second->linkage();
+ }
+ } else
+ Linkage = GS->second->linkage();
+ return !GlobalValue::isLocalLinkage(Linkage);
+ };
+
+ // FIXME: See if we can just internalize directly here via linkage changes
+ // based on the index, rather than invoking internalizeModule.
+ llvm::internalizeModule(TheModule, MustPreserveGV);
+}
+
// Automatically import functions in Module \p DestModule based on the summaries
// index.
//
-// The current implementation imports every called functions that exists in the
-// summaries index.
-bool FunctionImporter::importFunctions(Module &DestModule) {
+bool FunctionImporter::importFunctions(
+ Module &DestModule, const FunctionImporter::ImportMapTy &ImportList,
+ bool ForceImportReferencedDiscardableSymbols) {
DEBUG(dbgs() << "Starting import for Module "
<< DestModule.getModuleIdentifier() << "\n");
unsigned ImportedCount = 0;
- /// First step is collecting the called external functions.
- StringSet<> CalledFunctions;
- SmallVector<StringRef, 64> Worklist;
- for (auto &F : DestModule) {
- if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
- continue;
- findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
- }
- if (Worklist.empty())
- return false;
-
- /// Second step: for every call to an external function, try to import it.
-
// Linker that will be used for importing function
Linker TheLinker(DestModule);
-
- // Map of Module -> List of Function to import from the Module
- std::map<StringRef, DenseSet<const GlobalValue *>>
- ModuleToFunctionsToImportMap;
-
- // Analyze the summaries and get the list of functions to import by
- // populating ModuleToFunctionsToImportMap
- ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
- GetImportList(DestModule, Worklist, CalledFunctions,
- ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
- assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
-
- StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
- ModuleToTempMDValsMap;
-
// Do the actual import of functions now, one Module at a time
- for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+ std::set<StringRef> ModuleNameOrderedList;
+ for (auto &FunctionsToImportPerModule : ImportList) {
+ ModuleNameOrderedList.insert(FunctionsToImportPerModule.first());
+ }
+ for (auto &Name : ModuleNameOrderedList) {
// Get the module for the import
- auto &FunctionsToImport = FunctionsToImportPerModule.second;
- std::unique_ptr<Module> SrcModule =
- ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+ const auto &FunctionsToImportPerModule = ImportList.find(Name);
+ assert(FunctionsToImportPerModule != ImportList.end());
+ std::unique_ptr<Module> SrcModule = ModuleLoader(Name);
assert(&DestModule.getContext() == &SrcModule->getContext() &&
"Context mismatch");
- // Save the mapping of value ids to temporary metadata created when
- // importing this function. If we have already imported from this module,
- // add new temporary metadata to the existing mapping.
- auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
- if (!TempMDVals)
- TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+ // If modules were created with lazy metadata loading, materialize it
+ // now, before linking it (otherwise this will be a noop).
+ SrcModule->materializeMetadata();
+ UpgradeDebugInfo(*SrcModule);
+
+ auto &ImportGUIDs = FunctionsToImportPerModule->second;
+ // Find the globals to import
+ DenseSet<const GlobalValue *> GlobalsToImport;
+ for (Function &F : *SrcModule) {
+ if (!F.hasName())
+ continue;
+ auto GUID = F.getGUID();
+ auto Import = ImportGUIDs.count(GUID);
+ DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing function " << GUID
+ << " " << F.getName() << " from "
+ << SrcModule->getSourceFileName() << "\n");
+ if (Import) {
+ F.materialize();
+ if (EnableImportMetadata) {
+ // Add 'thinlto_src_module' metadata for statistics and debugging.
+ F.setMetadata(
+ "thinlto_src_module",
+ llvm::MDNode::get(
+ DestModule.getContext(),
+ {llvm::MDString::get(DestModule.getContext(),
+ SrcModule->getSourceFileName())}));
+ }
+ GlobalsToImport.insert(&F);
+ }
+ }
+ for (GlobalVariable &GV : SrcModule->globals()) {
+ if (!GV.hasName())
+ continue;
+ auto GUID = GV.getGUID();
+ auto Import = ImportGUIDs.count(GUID);
+ DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing global " << GUID
+ << " " << GV.getName() << " from "
+ << SrcModule->getSourceFileName() << "\n");
+ if (Import) {
+ GV.materialize();
+ GlobalsToImport.insert(&GV);
+ }
+ }
+ for (GlobalAlias &GA : SrcModule->aliases()) {
+ if (!GA.hasName())
+ continue;
+ auto GUID = GA.getGUID();
+ auto Import = ImportGUIDs.count(GUID);
+ DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing alias " << GUID
+ << " " << GA.getName() << " from "
+ << SrcModule->getSourceFileName() << "\n");
+ if (Import) {
+ // Alias can't point to "available_externally". However when we import
+ // linkOnceODR the linkage does not change. So we import the alias
+ // and aliasee only in this case. This has been handled by
+ // computeImportForFunction()
+ GlobalObject *GO = GA.getBaseObject();
+ assert(GO->hasLinkOnceODRLinkage() &&
+ "Unexpected alias to a non-linkonceODR in import list");
+#ifndef NDEBUG
+ if (!GlobalsToImport.count(GO))
+ DEBUG(dbgs() << " alias triggers importing aliasee " << GO->getGUID()
+ << " " << GO->getName() << " from "
+ << SrcModule->getSourceFileName() << "\n");
+#endif
+ GO->materialize();
+ GlobalsToImport.insert(GO);
+ GA.materialize();
+ GlobalsToImport.insert(&GA);
+ }
+ }
// Link in the specified functions.
- if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
- &Index, &FunctionsToImport, TempMDVals.get()))
+ if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
+ return true;
+
+ if (PrintImports) {
+ for (const auto *GV : GlobalsToImport)
+ dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
+ << " from " << SrcModule->getSourceFileName() << "\n";
+ }
+
+ // Instruct the linker that the client will take care of linkonce resolution
+ unsigned Flags = Linker::Flags::None;
+ if (!ForceImportReferencedDiscardableSymbols)
+ Flags |= Linker::Flags::DontForceLinkLinkonceODR;
+
+ if (TheLinker.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport))
report_fatal_error("Function Import: link error");
- ImportedCount += FunctionsToImport.size();
+ ImportedCount += GlobalsToImport.size();
}
- // Now link in metadata for all modules from which we imported functions.
- for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
- ModuleToTempMDValsMap) {
- // Load the specified source module.
- auto &SrcModule = ModuleLoaderCache(SME.getKey());
- // The modules were created with lazy metadata loading. Materialize it
- // now, before linking it.
- SrcModule.materializeMetadata();
- UpgradeDebugInfo(SrcModule);
-
- // Link in all necessary metadata from this module.
- if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
- return false;
- }
+ NumImported += ImportedCount;
DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
<< DestModule.getModuleIdentifier() << "\n");
@@ -355,11 +696,11 @@ static void diagnosticHandler(const DiagnosticInfo &DI) {
OS << '\n';
}
-/// Parse the function index out of an IR file and return the function
+/// Parse the summary index out of an IR file and return the summary
/// index object if found, or nullptr if not.
-static std::unique_ptr<FunctionInfoIndex>
-getFunctionIndexForFile(StringRef Path, std::string &Error,
- DiagnosticHandlerFunction DiagnosticHandler) {
+static std::unique_ptr<ModuleSummaryIndex> getModuleSummaryIndexForFile(
+ StringRef Path, std::string &Error,
+ const DiagnosticHandlerFunction &DiagnosticHandler) {
std::unique_ptr<MemoryBuffer> Buffer;
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(Path);
@@ -368,9 +709,9 @@ getFunctionIndexForFile(StringRef Path, std::string &Error,
return nullptr;
}
Buffer = std::move(BufferOrErr.get());
- ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
- object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
- DiagnosticHandler);
+ ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
+ object::ModuleSummaryIndexObjectFile::create(Buffer->getMemBufferRef(),
+ DiagnosticHandler);
if (std::error_code EC = ObjOrErr.getError()) {
Error = EC.message();
return nullptr;
@@ -381,32 +722,34 @@ getFunctionIndexForFile(StringRef Path, std::string &Error,
namespace {
/// Pass that performs cross-module function import provided a summary file.
class FunctionImportPass : public ModulePass {
- /// Optional function summary index to use for importing, otherwise
+ /// Optional module summary index to use for importing, otherwise
/// the summary-file option must be specified.
- const FunctionInfoIndex *Index;
+ const ModuleSummaryIndex *Index;
public:
/// Pass identification, replacement for typeid
static char ID;
/// Specify pass name for debug output
- const char *getPassName() const override {
- return "Function Importing";
- }
+ const char *getPassName() const override { return "Function Importing"; }
- explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+ explicit FunctionImportPass(const ModuleSummaryIndex *Index = nullptr)
: ModulePass(ID), Index(Index) {}
bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
if (SummaryFile.empty() && !Index)
report_fatal_error("error: -function-import requires -summary-file or "
"file from frontend\n");
- std::unique_ptr<FunctionInfoIndex> IndexPtr;
+ std::unique_ptr<ModuleSummaryIndex> IndexPtr;
if (!SummaryFile.empty()) {
if (Index)
report_fatal_error("error: -summary-file and index from frontend\n");
std::string Error;
- IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+ IndexPtr =
+ getModuleSummaryIndexForFile(SummaryFile, Error, diagnosticHandler);
if (!IndexPtr) {
errs() << "Error loading file '" << SummaryFile << "': " << Error
<< "\n";
@@ -415,9 +758,14 @@ public:
Index = IndexPtr.get();
}
- // First we need to promote to global scope and rename any local values that
+ // First step is collecting the import list.
+ FunctionImporter::ImportMapTy ImportList;
+ ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
+ ImportList);
+
+ // Next we need to promote to global scope and rename any local values that
// are potentially exported to other modules.
- if (renameModuleForThinLTO(M, Index)) {
+ if (renameModuleForThinLTO(M, *Index, nullptr)) {
errs() << "Error renaming module\n";
return false;
}
@@ -427,7 +775,8 @@ public:
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
- return Importer.importFunctions(M);
+ return Importer.importFunctions(
+ M, ImportList, !DontForceImportReferencedDiscardableSymbols);
}
};
} // anonymous namespace
@@ -439,7 +788,7 @@ INITIALIZE_PASS_END(FunctionImportPass, "function-import",
"Summary Based Function Import", false, false)
namespace llvm {
-Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr) {
return new FunctionImportPass(Index);
}
}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 9b276ed28e2e0..4c74698a1b619 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -15,15 +15,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Pass.h"
#include <unordered_map>
using namespace llvm;
@@ -31,32 +32,41 @@ using namespace llvm;
STATISTIC(NumAliases , "Number of global aliases removed");
STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumIFuncs, "Number of indirect functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
namespace {
- struct GlobalDCE : public ModulePass {
+ class GlobalDCELegacyPass : public ModulePass {
+ public:
static char ID; // Pass identification, replacement for typeid
- GlobalDCE() : ModulePass(ID) {
- initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ GlobalDCELegacyPass() : ModulePass(ID) {
+ initializeGlobalDCELegacyPassPass(*PassRegistry::getPassRegistry());
}
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
//
- bool runOnModule(Module &M) override;
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
+ ModuleAnalysisManager DummyMAM;
+ auto PA = Impl.run(M, DummyMAM);
+ return !PA.areAllPreserved();
+ }
private:
- SmallPtrSet<GlobalValue*, 32> AliveGlobals;
- SmallPtrSet<Constant *, 8> SeenConstants;
- std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+ GlobalDCEPass Impl;
+ };
+}
- /// GlobalIsNeeded - mark the specific global value as needed, and
- /// recursively mark anything that it uses as also needed.
- void GlobalIsNeeded(GlobalValue *GV);
- void MarkUsedGlobalsAsNeeded(Constant *C);
+char GlobalDCELegacyPass::ID = 0;
+INITIALIZE_PASS(GlobalDCELegacyPass, "globaldce",
+ "Dead Global Elimination", false, false)
- bool RemoveUnusedGlobalValue(GlobalValue &GV);
- };
+// Public interface to the GlobalDCEPass.
+ModulePass *llvm::createGlobalDCEPass() {
+ return new GlobalDCELegacyPass();
}
/// Returns true if F contains only a single "ret" instruction.
@@ -68,13 +78,7 @@ static bool isEmptyFunction(Function *F) {
return RI.getReturnValue() == nullptr;
}
-char GlobalDCE::ID = 0;
-INITIALIZE_PASS(GlobalDCE, "globaldce",
- "Dead Global Elimination", false, false)
-
-ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
-
-bool GlobalDCE::runOnModule(Module &M) {
+PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
bool Changed = false;
// Remove empty functions from the global ctors list.
@@ -92,21 +96,14 @@ bool GlobalDCE::runOnModule(Module &M) {
ComdatMembers.insert(std::make_pair(C, &GA));
// Loop over the module, adding globals which are obviously necessary.
- for (Function &F : M) {
- Changed |= RemoveUnusedGlobalValue(F);
- // Functions with external linkage are needed if they have a body
- if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
- if (!F.isDiscardableIfUnused())
- GlobalIsNeeded(&F);
- }
-
- for (GlobalVariable &GV : M.globals()) {
- Changed |= RemoveUnusedGlobalValue(GV);
+ for (GlobalObject &GO : M.global_objects()) {
+ Changed |= RemoveUnusedGlobalValue(GO);
+ // Functions with external linkage are needed if they have a body.
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
- if (!GV.isDiscardableIfUnused())
- GlobalIsNeeded(&GV);
+ if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage())
+ if (!GO.isDiscardableIfUnused())
+ GlobalIsNeeded(&GO);
}
for (GlobalAlias &GA : M.aliases()) {
@@ -116,6 +113,13 @@ bool GlobalDCE::runOnModule(Module &M) {
GlobalIsNeeded(&GA);
}
+ for (GlobalIFunc &GIF : M.ifuncs()) {
+ Changed |= RemoveUnusedGlobalValue(GIF);
+ // Externally visible ifuncs are needed.
+ if (!GIF.isDiscardableIfUnused())
+ GlobalIsNeeded(&GIF);
+ }
+
// Now that all globals which are needed are in the AliveGlobals set, we loop
// through the program, deleting those which are not alive.
//
@@ -150,6 +154,14 @@ bool GlobalDCE::runOnModule(Module &M) {
GA.setAliasee(nullptr);
}
+ // The third pass drops targets of ifuncs which are dead...
+ std::vector<GlobalIFunc*> DeadIFuncs;
+ for (GlobalIFunc &GIF : M.ifuncs())
+ if (!AliveGlobals.count(&GIF)) {
+ DeadIFuncs.push_back(&GIF);
+ GIF.setResolver(nullptr);
+ }
+
if (!DeadFunctions.empty()) {
// Now that all interferences have been dropped, delete the actual objects
// themselves.
@@ -180,17 +192,29 @@ bool GlobalDCE::runOnModule(Module &M) {
Changed = true;
}
+ // Now delete any dead aliases.
+ if (!DeadIFuncs.empty()) {
+ for (GlobalIFunc *GIF : DeadIFuncs) {
+ RemoveUnusedGlobalValue(*GIF);
+ M.getIFuncList().erase(GIF);
+ }
+ NumIFuncs += DeadIFuncs.size();
+ Changed = true;
+ }
+
// Make sure that all memory is released
AliveGlobals.clear();
SeenConstants.clear();
ComdatMembers.clear();
- return Changed;
+ if (Changed)
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
/// GlobalIsNeeded - the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
-void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+void GlobalDCEPass::GlobalIsNeeded(GlobalValue *G) {
// If the global is already in the set, no need to reprocess it.
if (!AliveGlobals.insert(G).second)
return;
@@ -205,9 +229,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// referenced by the initializer to the alive set.
if (GV->hasInitializer())
MarkUsedGlobalsAsNeeded(GV->getInitializer());
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
- // The target of a global alias is needed.
- MarkUsedGlobalsAsNeeded(GA->getAliasee());
+ } else if (GlobalIndirectSymbol *GIS = dyn_cast<GlobalIndirectSymbol>(G)) {
+ // The target of a global alias or ifunc is needed.
+ MarkUsedGlobalsAsNeeded(GIS->getIndirectSymbol());
} else {
// Otherwise this must be a function object. We have to scan the body of
// the function looking for constants and global values which are used as
@@ -228,7 +252,7 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
}
}
-void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+void GlobalDCEPass::MarkUsedGlobalsAsNeeded(Constant *C) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
return GlobalIsNeeded(GV);
@@ -248,7 +272,7 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
// so, nuke it. This will reduce the reference count on the global value, which
// might make it deader.
//
-bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+bool GlobalDCEPass::RemoveUnusedGlobalValue(GlobalValue &GV) {
if (GV.use_empty())
return false;
GV.removeDeadConstantUsers();
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index fd7736905fe84..310c29275faf2 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/GlobalOpt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -40,11 +40,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
-#include <deque>
using namespace llvm;
#define DEBUG_TYPE "globalopt"
@@ -65,46 +65,6 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
-namespace {
- struct GlobalOpt : public ModulePass {
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
- static char ID; // Pass identification, replacement for typeid
- GlobalOpt() : ModulePass(ID) {
- initializeGlobalOptPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- private:
- bool OptimizeFunctions(Module &M);
- bool OptimizeGlobalVars(Module &M);
- bool OptimizeGlobalAliases(Module &M);
- bool deleteIfDead(GlobalValue &GV);
- bool processGlobal(GlobalValue &GV);
- bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
- bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
-
- bool isPointerValueDeadOnEntryToFunction(const Function *F,
- GlobalValue *GV);
-
- TargetLibraryInfo *TLI;
- SmallSet<const Comdat *, 8> NotDiscardableComdats;
- };
-}
-
-char GlobalOpt::ID = 0;
-INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
- "Global Variable Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(GlobalOpt, "globalopt",
- "Global Variable Optimizer", false, false)
-
-ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-
/// Is this global variable possibly used by a leak checker as a root? If so,
/// we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
@@ -120,7 +80,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
return false;
SmallVector<Type *, 4> Types;
- Types.push_back(cast<PointerType>(GV->getType())->getElementType());
+ Types.push_back(GV->getValueType());
unsigned Limit = 20;
do {
@@ -329,7 +289,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// we already know what the result of any load from that GEP is.
// TODO: Handle splats.
if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
- SubInit = Constant::getNullValue(GEP->getType()->getElementType());
+ SubInit = Constant::getNullValue(GEP->getResultElementType());
}
Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
@@ -475,7 +435,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (!GlobalUsersSafeToSRA(GV))
return nullptr;
- assert(GV->hasLocalLinkage() && !GV->isConstant());
+ assert(GV->hasLocalLinkage());
Constant *Init = GV->getInitializer();
Type *Ty = Init->getType();
@@ -499,6 +459,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ NGV->copyAttributesFrom(GV);
Globals.push_back(NGV);
NewGlobals.push_back(NGV);
@@ -533,6 +494,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
NGV->setExternallyInitialized(GV->isExternallyInitialized());
+ NGV->copyAttributesFrom(GV);
Globals.push_back(NGV);
NewGlobals.push_back(NGV);
@@ -867,9 +829,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
}
Constant *RepValue = NewGV;
- if (NewGV->getType() != GV->getType()->getElementType())
- RepValue = ConstantExpr::getBitCast(RepValue,
- GV->getType()->getElementType());
+ if (NewGV->getType() != GV->getValueType())
+ RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
@@ -1283,6 +1244,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
std::vector<Value*> FieldGlobals;
std::vector<Value*> FieldMallocs;
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+
unsigned AS = GV->getType()->getPointerAddressSpace();
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
Type *FieldTy = STy->getElementType(FieldNo);
@@ -1292,6 +1256,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
nullptr, GV->getThreadLocalMode());
+ NGV->copyAttributesFrom(GV);
FieldGlobals.push_back(NGV);
unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
@@ -1300,7 +1265,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Type *IntPtrTy = DL.getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
- NElems, nullptr,
+ NElems, OpBundles, nullptr,
CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
@@ -1359,7 +1324,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Cmp, NullPtrBlock);
// Fill in FreeBlock.
- CallInst::CreateFree(GVVal, BI);
+ CallInst::CreateFree(GVVal, OpBundles, BI);
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
FreeBlock);
BranchInst::Create(NextBlock, FreeBlock);
@@ -1397,8 +1362,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Insert a store of null into each global.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
- Constant *Null = Constant::getNullValue(PT->getElementType());
+ Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
+ Constant *Null = Constant::getNullValue(ValTy);
new StoreInst(Null, FieldGlobals[i], SI);
}
// Erase the original store.
@@ -1500,7 +1465,7 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// into multiple malloc'd arrays, one for each field. This is basically
// SRoA for malloc'd memory.
- if (Ordering != NotAtomic)
+ if (Ordering != AtomicOrdering::NotAtomic)
return false;
// If this is an allocation of a fixed size array of structs, analyze as a
@@ -1525,9 +1490,11 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
- Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
- AllocSize, NumElements,
- nullptr, CI->getName());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ Instruction *Malloc =
+ CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
+ OpBundles, nullptr, CI->getName());
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
@@ -1583,7 +1550,7 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
/// boolean and select between the two values whenever it is used. This exposes
/// the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
- Type *GVElType = GV->getType()->getElementType();
+ Type *GVElType = GV->getValueType();
// If GVElType is already i1, it is already shrunk. If the type of the GV is
// an FP value, pointer or vector, don't do this optimization because a select
@@ -1611,6 +1578,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GV->getName()+".b",
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
+ NewGV->copyAttributesFrom(GV);
GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
Constant *InitVal = GV->getInitializer();
@@ -1679,7 +1647,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
-bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+static bool deleteIfDead(GlobalValue &GV,
+ SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
GV.removeDeadConstantUsers();
if (!GV.isDiscardableIfUnused())
@@ -1703,36 +1672,9 @@ bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
return true;
}
-/// Analyze the specified global variable and optimize it if possible. If we
-/// make a change, return true.
-bool GlobalOpt::processGlobal(GlobalValue &GV) {
- // Do more involved optimizations if the global is internal.
- if (!GV.hasLocalLinkage())
- return false;
-
- GlobalStatus GS;
-
- if (GlobalStatus::analyzeGlobal(&GV, GS))
- return false;
-
- bool Changed = false;
- if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
- GV.setUnnamedAddr(true);
- NumUnnamed++;
- Changed = true;
- }
-
- auto *GVar = dyn_cast<GlobalVariable>(&GV);
- if (!GVar)
- return Changed;
-
- if (GVar->isConstant() || !GVar->hasInitializer())
- return Changed;
-
- return processInternalGlobal(GVar, GS) || Changed;
-}
-
-bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+static bool isPointerValueDeadOnEntryToFunction(
+ const Function *F, GlobalValue *GV,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
// Find all uses of GV. We expect them all to be in F, and if we can't
// identify any of the uses we bail out.
//
@@ -1776,8 +1718,7 @@ bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalVal
// of them are known not to depend on the value of the global at the function
// entry point. We do this by ensuring that every load is dominated by at
// least one store.
- auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
- .getDomTree();
+ auto &DT = LookupDomTree(*const_cast<Function *>(F));
// The below check is quadratic. Check we're not going to do too many tests.
// FIXME: Even though this will always have worst-case quadratic time, we
@@ -1866,8 +1807,9 @@ static void makeAllConstantUsesInstructions(Constant *C) {
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
- const GlobalStatus &GS) {
+static bool processInternalGlobal(
+ GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function and
// this function is non-recursive, we replace the global with a local alloca
@@ -1879,16 +1821,17 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
// If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction &&
- GV->getType()->getElementType()->isSingleValueType() &&
+ GV->getValueType()->isSingleValueType() &&
GV->getType()->getAddressSpace() == 0 &&
!GV->isExternallyInitialized() &&
allNonInstructionUsersCanBeMadeInstructions(GV) &&
GS.AccessingFunction->doesNotRecurse() &&
- isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+ isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
+ LookupDomTree)) {
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
- Type *ElemTy = GV->getType()->getElementType();
+ Type *ElemTy = GV->getValueType();
// FIXME: Pass Global's alignment when globals have alignment
AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
GV->getName(), &FirstI);
@@ -1896,7 +1839,7 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
makeAllConstantUsesInstructions(GV);
-
+
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
++NumLocalized;
@@ -1926,7 +1869,8 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
}
return Changed;
- } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
+ }
+ if (GS.StoredType <= GlobalStatus::InitializerStored) {
DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
GV->setConstant(true);
@@ -1939,15 +1883,18 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
<< "all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
+ return true;
}
+ // Fall through to the next check; see if we can optimize further.
++NumMarked;
- return true;
- } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ }
+ if (!GV->getInitializer()->getType()->isSingleValueType()) {
const DataLayout &DL = GV->getParent()->getDataLayout();
if (SRAGlobal(GV, DL))
return true;
- } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+ }
+ if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -1978,7 +1925,7 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean.
if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
- if (GS.Ordering == NotAtomic) {
+ if (GS.Ordering == AtomicOrdering::NotAtomic) {
if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
++NumShrunkToBool;
return true;
@@ -1990,6 +1937,44 @@ bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
return false;
}
+/// Analyze the specified global variable and optimize it if possible. If we
+/// make a change, return true.
+static bool
+processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
+ if (GV.getName().startswith("llvm."))
+ return false;
+
+ GlobalStatus GS;
+
+ if (GlobalStatus::analyzeGlobal(&GV, GS))
+ return false;
+
+ bool Changed = false;
+ if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
+ auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
+ : GlobalValue::UnnamedAddr::Local;
+ if (NewUnnamedAddr != GV.getUnnamedAddr()) {
+ GV.setUnnamedAddr(NewUnnamedAddr);
+ NumUnnamed++;
+ Changed = true;
+ }
+ }
+
+ // Do more involved optimizations if the global is internal.
+ if (!GV.hasLocalLinkage())
+ return Changed;
+
+ auto *GVar = dyn_cast<GlobalVariable>(&GV);
+ if (!GVar)
+ return Changed;
+
+ if (GVar->isConstant() || !GVar->hasInitializer())
+ return Changed;
+
+ return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed;
+}
+
/// Walk all of the direct calls of the specified function, changing them to
/// FastCC.
static void ChangeCalleesToFastCall(Function *F) {
@@ -2034,7 +2019,10 @@ static bool isProfitableToMakeFastCC(Function *F) {
return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
}
-bool GlobalOpt::OptimizeFunctions(Module &M) {
+static bool
+OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
+ SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
bool Changed = false;
// Optimize functions.
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
@@ -2043,12 +2031,12 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*F)) {
+ if (deleteIfDead(*F, NotDiscardableComdats)) {
Changed = true;
continue;
}
- Changed |= processGlobal(*F);
+ Changed |= processGlobal(*F, TLI, LookupDomTree);
if (!F->hasLocalLinkage())
continue;
@@ -2075,7 +2063,10 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
return Changed;
}
-bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+static bool
+OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
+ SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
bool Changed = false;
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
@@ -2093,148 +2084,16 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
GV->setInitializer(New);
}
- if (deleteIfDead(*GV)) {
+ if (deleteIfDead(*GV, NotDiscardableComdats)) {
Changed = true;
continue;
}
- Changed |= processGlobal(*GV);
+ Changed |= processGlobal(*GV, TLI, LookupDomTree);
}
return Changed;
}
-static inline bool
-isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant *> &SimpleConstants,
- const DataLayout &DL);
-
-/// Return true if the specified constant can be handled by the code generator.
-/// We don't want to generate something like:
-/// void *X = &X/42;
-/// because the code generator doesn't have a relocation that can handle that.
-///
-/// This function should be called if C was not found (but just got inserted)
-/// in SimpleConstants to avoid having to rescan the same constants all the
-/// time.
-static bool
-isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSetImpl<Constant *> &SimpleConstants,
- const DataLayout &DL) {
- // Simple global addresses are supported, do not allow dllimport or
- // thread-local globals.
- if (auto *GV = dyn_cast<GlobalValue>(C))
- return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
-
- // Simple integer, undef, constant aggregate zero, etc are all supported.
- if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
- return true;
-
- // Aggregate values are safe if all their elements are.
- if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
- isa<ConstantVector>(C)) {
- for (Value *Op : C->operands())
- if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
- return false;
- return true;
- }
-
- // We don't know exactly what relocations are allowed in constant expressions,
- // so we allow &global+constantoffset, which is safe and uniformly supported
- // across targets.
- ConstantExpr *CE = cast<ConstantExpr>(C);
- switch (CE->getOpcode()) {
- case Instruction::BitCast:
- // Bitcast is fine if the casted value is fine.
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
-
- case Instruction::IntToPtr:
- case Instruction::PtrToInt:
- // int <=> ptr is fine if the int type is the same size as the
- // pointer type.
- if (DL.getTypeSizeInBits(CE->getType()) !=
- DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
- return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
-
- // GEP is fine if it is simple + constant offset.
- case Instruction::GetElementPtr:
- for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
- if (!isa<ConstantInt>(CE->getOperand(i)))
- return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
-
- case Instruction::Add:
- // We allow simple+cst.
- if (!isa<ConstantInt>(CE->getOperand(1)))
- return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
- }
- return false;
-}
-
-static inline bool
-isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant *> &SimpleConstants,
- const DataLayout &DL) {
- // If we already checked this constant, we win.
- if (!SimpleConstants.insert(C).second)
- return true;
- // Check the constant.
- return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
-}
-
-
-/// Return true if this constant is simple enough for us to understand. In
-/// particular, if it is a cast to anything other than from one pointer type to
-/// another pointer type, we punt. We basically just support direct accesses to
-/// globals and GEP's of globals. This should be kept up to date with
-/// CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C) {
- // Conservatively, avoid aggregate types. This is because we don't
- // want to worry about them partially overlapping other stores.
- if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
- return false;
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/*_odr/linkonce linkage or external globals.
- return GV->hasUniqueInitializer();
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- // Handle a constantexpr gep.
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- isa<GlobalVariable>(CE->getOperand(0)) &&
- cast<GEPOperator>(CE)->isInBounds()) {
- GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
- if (!GV->hasUniqueInitializer())
- return false;
-
- // The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
- if (!CI || !CI->isZero()) return false;
-
- // The remaining indices must be compile-time known integers within the
- // notional bounds of the corresponding static array types.
- if (!CE->isGEPWithNoNotionalOverIndexing())
- return false;
-
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
-
- // A constantexpr bitcast from a pointer to another pointer is a no-op,
- // and we know how to evaluate it by moving the bitcast from the pointer
- // operand to the value operand.
- } else if (CE->getOpcode() == Instruction::BitCast &&
- isa<GlobalVariable>(CE->getOperand(0))) {
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
- return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
- }
- }
-
- return false;
-}
-
/// Evaluate a piece of a constantexpr store into a global initializer. This
/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the
/// GEP operands of Addr [0, OpNo) have been stepped into.
@@ -2298,533 +2157,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
}
-namespace {
-
-/// This class evaluates LLVM IR, producing the Constant representing each SSA
-/// instruction. Changes to global variables are stored in a mapping that can
-/// be iterated over after the evaluation is complete. Once an evaluation call
-/// fails, the evaluation object should not be reused.
-class Evaluator {
-public:
- Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
- : DL(DL), TLI(TLI) {
- ValueStack.emplace_back();
- }
-
- ~Evaluator() {
- for (auto &Tmp : AllocaTmps)
- // If there are still users of the alloca, the program is doing something
- // silly, e.g. storing the address of the alloca somewhere and using it
- // later. Since this is undefined, we'll just make it be null.
- if (!Tmp->use_empty())
- Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
- }
-
- /// Evaluate a call to function F, returning true if successful, false if we
- /// can't evaluate it. ActualArgs contains the formal arguments for the
- /// function.
- bool EvaluateFunction(Function *F, Constant *&RetVal,
- const SmallVectorImpl<Constant*> &ActualArgs);
-
- /// Evaluate all instructions in block BB, returning true if successful, false
- /// if we can't evaluate it. NewBB returns the next BB that control flows
- /// into, or null upon return.
- bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
-
- Constant *getVal(Value *V) {
- if (Constant *CV = dyn_cast<Constant>(V)) return CV;
- Constant *R = ValueStack.back().lookup(V);
- assert(R && "Reference to an uncomputed value!");
- return R;
- }
-
- void setVal(Value *V, Constant *C) {
- ValueStack.back()[V] = C;
- }
-
- const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
- return MutatedMemory;
- }
-
- const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const {
- return Invariants;
- }
-
-private:
- Constant *ComputeLoadResult(Constant *P);
-
- /// As we compute SSA register values, we store their contents here. The back
- /// of the deque contains the current function and the stack contains the
- /// values in the calling frames.
- std::deque<DenseMap<Value*, Constant*>> ValueStack;
-
- /// This is used to detect recursion. In pathological situations we could hit
- /// exponential behavior, but at least there is nothing unbounded.
- SmallVector<Function*, 4> CallStack;
-
- /// For each store we execute, we update this map. Loads check this to get
- /// the most up-to-date value. If evaluation is successful, this state is
- /// committed to the process.
- DenseMap<Constant*, Constant*> MutatedMemory;
-
- /// To 'execute' an alloca, we create a temporary global variable to represent
- /// its body. This vector is needed so we can delete the temporary globals
- /// when we are done.
- SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
-
- /// These global variables have been marked invariant by the static
- /// constructor.
- SmallPtrSet<GlobalVariable*, 8> Invariants;
-
- /// These are constants we have checked and know to be simple enough to live
- /// in a static initializer of a global.
- SmallPtrSet<Constant*, 8> SimpleConstants;
-
- const DataLayout &DL;
- const TargetLibraryInfo *TLI;
-};
-
-} // anonymous namespace
-
-/// Return the value that would be computed by a load from P after the stores
-/// reflected by 'memory' have been performed. If we can't decide, return null.
-Constant *Evaluator::ComputeLoadResult(Constant *P) {
- // If this memory location has been recently stored, use the stored value: it
- // is the most up-to-date.
- DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
- if (I != MutatedMemory.end()) return I->second;
-
- // Access it.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
- if (GV->hasDefinitiveInitializer())
- return GV->getInitializer();
- return nullptr;
- }
-
- // Handle a constantexpr getelementptr.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- isa<GlobalVariable>(CE->getOperand(0))) {
- GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- if (GV->hasDefinitiveInitializer())
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
- }
-
- return nullptr; // don't know how to evaluate.
-}
-
-/// Evaluate all instructions in block BB, returning true if successful, false
-/// if we can't evaluate it. NewBB returns the next BB that control flows into,
-/// or null upon return.
-bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
- BasicBlock *&NextBB) {
- // This is the main evaluation loop.
- while (1) {
- Constant *InstResult = nullptr;
-
- DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
-
- if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
- if (!SI->isSimple()) {
- DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
- return false; // no volatile/atomic accesses.
- }
- Constant *Ptr = getVal(SI->getOperand(1));
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
- DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
- Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
- DEBUG(dbgs() << "; To: " << *Ptr << "\n");
- }
- if (!isSimpleEnoughPointerToCommit(Ptr)) {
- // If this is too complex for us to commit, reject it.
- DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
- return false;
- }
-
- Constant *Val = getVal(SI->getOperand(0));
-
- // If this might be too difficult for the backend to handle (e.g. the addr
- // of one global variable divided by another) then we can't commit it.
- if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
- DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
- << "\n");
- return false;
- }
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
- if (CE->getOpcode() == Instruction::BitCast) {
- DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
- // If we're evaluating a store through a bitcast, then we need
- // to pull the bitcast off the pointer type and push it onto the
- // stored value.
- Ptr = CE->getOperand(0);
-
- Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
-
- // In order to push the bitcast onto the stored value, a bitcast
- // from NewTy to Val's type must be legal. If it's not, we can try
- // introspecting NewTy to find a legal conversion.
- while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
- // If NewTy is a struct, we can convert the pointer to the struct
- // into a pointer to its first member.
- // FIXME: This could be extended to support arrays as well.
- if (StructType *STy = dyn_cast<StructType>(NewTy)) {
- NewTy = STy->getTypeAtIndex(0U);
-
- IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
- Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
- Constant * const IdxList[] = {IdxZero, IdxZero};
-
- Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
- Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
-
- // If we can't improve the situation by introspecting NewTy,
- // we have to give up.
- } else {
- DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
- "evaluate.\n");
- return false;
- }
- }
-
- // If we found compatible types, go ahead and push the bitcast
- // onto the stored value.
- Val = ConstantExpr::getBitCast(Val, NewTy);
-
- DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
- }
- }
-
- MutatedMemory[Ptr] = Val;
- } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
- InstResult = ConstantExpr::get(BO->getOpcode(),
- getVal(BO->getOperand(0)),
- getVal(BO->getOperand(1)));
- DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
- << "\n");
- } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
- InstResult = ConstantExpr::getCompare(CI->getPredicate(),
- getVal(CI->getOperand(0)),
- getVal(CI->getOperand(1)));
- DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
- << "\n");
- } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
- InstResult = ConstantExpr::getCast(CI->getOpcode(),
- getVal(CI->getOperand(0)),
- CI->getType());
- DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
- << "\n");
- } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
- InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
- getVal(SI->getOperand(1)),
- getVal(SI->getOperand(2)));
- DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
- << "\n");
- } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
- InstResult = ConstantExpr::getExtractValue(
- getVal(EVI->getAggregateOperand()), EVI->getIndices());
- DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
- << "\n");
- } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
- InstResult = ConstantExpr::getInsertValue(
- getVal(IVI->getAggregateOperand()),
- getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
- DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
- << "\n");
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
- Constant *P = getVal(GEP->getOperand(0));
- SmallVector<Constant*, 8> GEPOps;
- for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
- i != e; ++i)
- GEPOps.push_back(getVal(*i));
- InstResult =
- ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
- cast<GEPOperator>(GEP)->isInBounds());
- DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
- << "\n");
- } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
-
- if (!LI->isSimple()) {
- DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
- return false; // no volatile/atomic accesses.
- }
-
- Constant *Ptr = getVal(LI->getOperand(0));
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
- Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
- DEBUG(dbgs() << "Found a constant pointer expression, constant "
- "folding: " << *Ptr << "\n");
- }
- InstResult = ComputeLoadResult(Ptr);
- if (!InstResult) {
- DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
- "\n");
- return false; // Could not evaluate load.
- }
-
- DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
- } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
- if (AI->isArrayAllocation()) {
- DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
- return false; // Cannot handle array allocs.
- }
- Type *Ty = AI->getType()->getElementType();
- AllocaTmps.push_back(
- make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
- UndefValue::get(Ty), AI->getName()));
- InstResult = AllocaTmps.back().get();
- DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
- } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
- CallSite CS(&*CurInst);
-
- // Debug info can safely be ignored here.
- if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
- DEBUG(dbgs() << "Ignoring debug info.\n");
- ++CurInst;
- continue;
- }
-
- // Cannot handle inline asm.
- if (isa<InlineAsm>(CS.getCalledValue())) {
- DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
- return false;
- }
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
- if (MSI->isVolatile()) {
- DEBUG(dbgs() << "Can not optimize a volatile memset " <<
- "intrinsic.\n");
- return false;
- }
- Constant *Ptr = getVal(MSI->getDest());
- Constant *Val = getVal(MSI->getValue());
- Constant *DestVal = ComputeLoadResult(getVal(Ptr));
- if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
- // This memset is a no-op.
- DEBUG(dbgs() << "Ignoring no-op memset.\n");
- ++CurInst;
- continue;
- }
- }
-
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
- ++CurInst;
- continue;
- }
-
- if (II->getIntrinsicID() == Intrinsic::invariant_start) {
- // We don't insert an entry into Values, as it doesn't have a
- // meaningful return value.
- if (!II->use_empty()) {
- DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
- return false;
- }
- ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
- Value *PtrArg = getVal(II->getArgOperand(1));
- Value *Ptr = PtrArg->stripPointerCasts();
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
- Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
- if (!Size->isAllOnesValue() &&
- Size->getValue().getLimitedValue() >=
- DL.getTypeStoreSize(ElemTy)) {
- Invariants.insert(GV);
- DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
- << "\n");
- } else {
- DEBUG(dbgs() << "Found a global var, but can not treat it as an "
- "invariant.\n");
- }
- }
- // Continue even if we do nothing.
- ++CurInst;
- continue;
- } else if (II->getIntrinsicID() == Intrinsic::assume) {
- DEBUG(dbgs() << "Skipping assume intrinsic.\n");
- ++CurInst;
- continue;
- }
-
- DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
- return false;
- }
-
- // Resolve function pointers.
- Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
- if (!Callee || Callee->mayBeOverridden()) {
- DEBUG(dbgs() << "Can not resolve function pointer.\n");
- return false; // Cannot resolve.
- }
-
- SmallVector<Constant*, 8> Formals;
- for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
- Formals.push_back(getVal(*i));
-
- if (Callee->isDeclaration()) {
- // If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
- InstResult = C;
- DEBUG(dbgs() << "Constant folded function call. Result: " <<
- *InstResult << "\n");
- } else {
- DEBUG(dbgs() << "Can not constant fold function call.\n");
- return false;
- }
- } else {
- if (Callee->getFunctionType()->isVarArg()) {
- DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
- return false;
- }
-
- Constant *RetVal = nullptr;
- // Execute the call, if successful, use the return value.
- ValueStack.emplace_back();
- if (!EvaluateFunction(Callee, RetVal, Formals)) {
- DEBUG(dbgs() << "Failed to evaluate function.\n");
- return false;
- }
- ValueStack.pop_back();
- InstResult = RetVal;
-
- if (InstResult) {
- DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
- InstResult << "\n\n");
- } else {
- DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
- }
- }
- } else if (isa<TerminatorInst>(CurInst)) {
- DEBUG(dbgs() << "Found a terminator instruction.\n");
-
- if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
- if (BI->isUnconditional()) {
- NextBB = BI->getSuccessor(0);
- } else {
- ConstantInt *Cond =
- dyn_cast<ConstantInt>(getVal(BI->getCondition()));
- if (!Cond) return false; // Cannot determine.
-
- NextBB = BI->getSuccessor(!Cond->getZExtValue());
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
- ConstantInt *Val =
- dyn_cast<ConstantInt>(getVal(SI->getCondition()));
- if (!Val) return false; // Cannot determine.
- NextBB = SI->findCaseValue(Val).getCaseSuccessor();
- } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
- Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
- if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
- NextBB = BA->getBasicBlock();
- else
- return false; // Cannot determine.
- } else if (isa<ReturnInst>(CurInst)) {
- NextBB = nullptr;
- } else {
- // invoke, unwind, resume, unreachable.
- DEBUG(dbgs() << "Can not handle terminator.");
- return false; // Cannot handle this terminator.
- }
-
- // We succeeded at evaluating this block!
- DEBUG(dbgs() << "Successfully evaluated block.\n");
- return true;
- } else {
- // Did not know how to evaluate this!
- DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
- "\n");
- return false;
- }
-
- if (!CurInst->use_empty()) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
- InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
-
- setVal(&*CurInst, InstResult);
- }
-
- // If we just processed an invoke, we finished evaluating the block.
- if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
- NextBB = II->getNormalDest();
- DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
- return true;
- }
-
- // Advance program counter.
- ++CurInst;
- }
-}
-
-/// Evaluate a call to function F, returning true if successful, false if we
-/// can't evaluate it. ActualArgs contains the formal arguments for the
-/// function.
-bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
- const SmallVectorImpl<Constant*> &ActualArgs) {
- // Check to see if this function is already executing (recursion). If so,
- // bail out. TODO: we might want to accept limited recursion.
- if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
- return false;
-
- CallStack.push_back(F);
-
- // Initialize arguments to the incoming values specified.
- unsigned ArgNo = 0;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
- ++AI, ++ArgNo)
- setVal(&*AI, ActualArgs[ArgNo]);
-
- // ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
- // we can only evaluate any one basic block at most once. This set keeps
- // track of what we have executed so we can detect recursive cases etc.
- SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
- // CurBB - The current basic block we're evaluating.
- BasicBlock *CurBB = &F->front();
-
- BasicBlock::iterator CurInst = CurBB->begin();
-
- while (1) {
- BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
- DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
-
- if (!EvaluateBlock(CurInst, NextBB))
- return false;
-
- if (!NextBB) {
- // Successfully running until there's no next block means that we found
- // the return. Fill it the return value and pop the call stack.
- ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
- if (RI->getNumOperands())
- RetVal = getVal(RI->getOperand(0));
- CallStack.pop_back();
- return true;
- }
-
- // Okay, we succeeded in evaluating this control flow. See if we have
- // executed the new block before. If so, we have a looping function,
- // which we cannot evaluate in reasonable time.
- if (!ExecutedBlocks.insert(NextBB).second)
- return false; // looped!
-
- // Okay, we have never been in this block before. Check to see if there
- // are any PHI nodes. If so, evaluate them with information about where
- // we came from.
- PHINode *PN = nullptr;
- for (CurInst = NextBB->begin();
- (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
- setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
-
- // Advance to the next block.
- CurBB = NextBB;
- }
-}
-
/// Evaluate static constructors in the function, if we can. Return true if we
/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ TargetLibraryInfo *TLI) {
// Call the function.
Evaluator Eval(DL, TLI);
Constant *RetValDummy;
@@ -2838,10 +2174,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << Eval.getMutatedMemory().size()
<< " stores.\n");
- for (DenseMap<Constant*, Constant*>::const_iterator I =
- Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
- I != E; ++I)
- CommitValueTo(I->second, I->first);
+ for (const auto &I : Eval.getMutatedMemory())
+ CommitValueTo(I.second, I.first);
for (GlobalVariable *GV : Eval.getInvariants())
GV->setConstant(true);
}
@@ -2850,8 +2184,9 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
}
static int compareNames(Constant *const *A, Constant *const *B) {
- return (*A)->stripPointerCasts()->getName().compare(
- (*B)->stripPointerCasts()->getName());
+ Value *AStripped = (*A)->stripPointerCastsNoFollowAliases();
+ Value *BStripped = (*B)->stripPointerCastsNoFollowAliases();
+ return AStripped->getName().compare(BStripped->getName());
}
static void setUsedInitializer(GlobalVariable &V,
@@ -2995,7 +2330,9 @@ static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
return true;
}
-bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+static bool
+OptimizeGlobalAliases(Module &M,
+ SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
bool Changed = false;
LLVMUsed Used(M);
@@ -3010,13 +2347,13 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
J->setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*J)) {
+ if (deleteIfDead(*J, NotDiscardableComdats)) {
Changed = true;
continue;
}
// If the aliasee may change at link time, nothing can be done - bail out.
- if (J->mayBeOverridden())
+ if (J->isInterposable())
continue;
Constant *Aliasee = J->getAliasee();
@@ -3064,23 +2401,16 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
}
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::cxa_atexit))
+ LibFunc::Func F = LibFunc::cxa_atexit;
+ if (!TLI->has(F))
return nullptr;
- Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
-
+ Function *Fn = M.getFunction(TLI->getName(F));
if (!Fn)
return nullptr;
- FunctionType *FTy = Fn->getFunctionType();
-
- // Checking that the function has the right return type, the right number of
- // parameters and that they all have pointer types should be enough.
- if (!FTy->getReturnType()->isIntegerTy() ||
- FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
+ // Make sure that the function has the correct prototype.
+ if (!TLI->getLibFunc(*Fn, F) || F != LibFunc::cxa_atexit)
return nullptr;
return Fn;
@@ -3132,7 +2462,7 @@ static bool cxxDtorIsEmpty(const Function &Fn,
return false;
}
-bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
/// Itanium C++ ABI p3.3.5:
///
/// After constructing a global (or local static) object, that will require
@@ -3179,12 +2509,11 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
return Changed;
}
-bool GlobalOpt::runOnModule(Module &M) {
+static bool optimizeGlobalsInModule(
+ Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
+ SmallSet<const Comdat *, 8> NotDiscardableComdats;
bool Changed = false;
-
- auto &DL = M.getDataLayout();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -3204,7 +2533,8 @@ bool GlobalOpt::runOnModule(Module &M) {
NotDiscardableComdats.insert(C);
// Delete functions that are trivially dead, ccc -> fastcc
- LocalChange |= OptimizeFunctions(M);
+ LocalChange |=
+ OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats);
// Optimize global_ctors list.
LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
@@ -3212,10 +2542,11 @@ bool GlobalOpt::runOnModule(Module &M) {
});
// Optimize non-address-taken globals.
- LocalChange |= OptimizeGlobalVars(M);
+ LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree,
+ NotDiscardableComdats);
// Resolve aliases, when possible.
- LocalChange |= OptimizeGlobalAliases(M);
+ LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
// Try to remove trivial global destructors if they are not removed
// already.
@@ -3232,3 +2563,53 @@ bool GlobalOpt::runOnModule(Module &M) {
return Changed;
}
+PreservedAnalyses GlobalOptPass::run(Module &M, AnalysisManager<Module> &AM) {
+ auto &DL = M.getDataLayout();
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
+ auto &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct GlobalOptLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ GlobalOptLegacyPass() : ModulePass(ID) {
+ initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
+ auto &DL = M.getDataLayout();
+ auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto LookupDomTree = [this](Function &F) -> DominatorTree & {
+ return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
+ return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ }
+};
+}
+
+char GlobalOptLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
+ "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
+ "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() {
+ return new GlobalOptLegacyPass();
+}
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index af541d1552545..916135e33cd50 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -41,44 +41,14 @@ namespace {
}
bool runOnModule(Module &M) override;
- private:
- bool PropagateConstantsIntoArguments(Function &F);
- bool PropagateConstantReturn(Function &F);
};
}
-char IPCP::ID = 0;
-INITIALIZE_PASS(IPCP, "ipconstprop",
- "Interprocedural constant propagation", false, false)
-
-ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
-
-bool IPCP::runOnModule(Module &M) {
- bool Changed = false;
- bool LocalChange = true;
-
- // FIXME: instead of using smart algorithms, we just iterate until we stop
- // making changes.
- while (LocalChange) {
- LocalChange = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration()) {
- // Delete any klingons.
- I->removeDeadConstantUsers();
- if (I->hasLocalLinkage())
- LocalChange |= PropagateConstantsIntoArguments(*I);
- Changed |= PropagateConstantReturn(*I);
- }
- Changed |= LocalChange;
- }
- return Changed;
-}
-
/// PropagateConstantsIntoArguments - Look at all uses of the specified
/// function. If all uses are direct call sites, and all pass a particular
/// constant in for an argument, propagate that constant in as the argument.
///
-bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+static bool PropagateConstantsIntoArguments(Function &F) {
if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
// For each argument, keep track of its constant value and whether it is a
@@ -157,13 +127,14 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
// Additionally if a function always returns one of its arguments directly,
// callers will be updated to use the value they pass in directly instead of
// using the return value.
-bool IPCP::PropagateConstantReturn(Function &F) {
+static bool PropagateConstantReturn(Function &F) {
if (F.getReturnType()->isVoidTy())
return false; // No return value.
- // If this function could be overridden later in the link stage, we can't
- // propagate information about its results into callers.
- if (F.mayBeOverridden())
+ // We can infer and propagate the return value only when we know that the
+ // definition we'll get at link time is *exactly* the definition we see now.
+ // For more details, see GlobalValue::mayBeDerefined.
+ if (!F.isDefinitionExact())
return false;
// Check to see if this function returns a constant.
@@ -176,8 +147,8 @@ bool IPCP::PropagateConstantReturn(Function &F) {
RetVals.push_back(UndefValue::get(F.getReturnType()));
unsigned NumNonConstant = 0;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ for (BasicBlock &BB : F)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
// Already found conflicting return values?
Value *RV = RetVals[i];
@@ -277,3 +248,33 @@ bool IPCP::PropagateConstantReturn(Function &F) {
if (MadeChange) ++NumReturnValProped;
return MadeChange;
}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ bool Changed = false;
+ bool LocalChange = true;
+
+ // FIXME: instead of using smart algorithms, we just iterate until we stop
+ // making changes.
+ while (LocalChange) {
+ LocalChange = false;
+ for (Function &F : M)
+ if (!F.isDeclaration()) {
+ // Delete any klingons.
+ F.removeDeadConstantUsers();
+ if (F.hasLocalLinkage())
+ LocalChange |= PropagateConstantsIntoArguments(F);
+ Changed |= PropagateConstantReturn(F);
+ }
+ Changed |= LocalChange;
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 89629cf06e083..3507eba81b2f5 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -18,31 +18,32 @@
#include "llvm/InitializePasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeArgPromotionPass(Registry);
- initializeConstantMergePass(Registry);
+ initializeConstantMergeLegacyPassPass(Registry);
initializeCrossDSOCFIPass(Registry);
initializeDAEPass(Registry);
initializeDAHPass(Registry);
initializeForceFunctionAttrsLegacyPassPass(Registry);
- initializeGlobalDCEPass(Registry);
- initializeGlobalOptPass(Registry);
+ initializeGlobalDCELegacyPassPass(Registry);
+ initializeGlobalOptLegacyPassPass(Registry);
initializeIPCPPass(Registry);
initializeAlwaysInlinerPass(Registry);
initializeSimpleInlinerPass(Registry);
initializeInferFunctionAttrsLegacyPassPass(Registry);
- initializeInternalizePassPass(Registry);
+ initializeInternalizeLegacyPassPass(Registry);
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
- initializeLowerBitSetsPass(Registry);
+ initializeLowerTypeTestsPass(Registry);
initializeMergeFunctionsPass(Registry);
- initializePartialInlinerPass(Registry);
- initializePostOrderFunctionAttrsPass(Registry);
- initializeReversePostOrderFunctionAttrsPass(Registry);
+ initializePartialInlinerLegacyPassPass(Registry);
+ initializePostOrderFunctionAttrsLegacyPassPass(Registry);
+ initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
initializePruneEHPass(Registry);
initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
@@ -50,9 +51,10 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
- initializeEliminateAvailableExternallyPass(Registry);
- initializeSampleProfileLoaderPass(Registry);
+ initializeEliminateAvailableExternallyLegacyPassPass(Registry);
+ initializeSampleProfileLoaderLegacyPassPass(Registry);
initializeFunctionImportPassPass(Registry);
+ initializeWholeProgramDevirtPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
@@ -72,7 +74,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
}
void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createPostOrderFunctionAttrsPass());
+ unwrap(PM)->add(createPostOrderFunctionAttrsLegacyPass());
}
void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
@@ -104,10 +106,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
}
void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
- std::vector<const char *> Export;
- if (AllButMain)
- Export.push_back("main");
- unwrap(PM)->add(createInternalizePass(Export));
+ auto PreserveMain = [=](const GlobalValue &GV) {
+ return AllButMain && GV.getName() == "main";
+ };
+ unwrap(PM)->add(createInternalizePass(PreserveMain));
}
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 4295a7595c29f..ab2d2bd8b02a6 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -8,7 +8,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/Function.h"
@@ -16,937 +15,27 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
#define DEBUG_TYPE "inferattrs"
-STATISTIC(NumReadNone, "Number of functions inferred as readnone");
-STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
-STATISTIC(NumArgMemOnly, "Number of functions inferred as argmemonly");
-STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
-STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
-STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
-STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
-STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
-
-static bool setDoesNotAccessMemory(Function &F) {
- if (F.doesNotAccessMemory())
- return false;
- F.setDoesNotAccessMemory();
- ++NumReadNone;
- return true;
-}
-
-static bool setOnlyReadsMemory(Function &F) {
- if (F.onlyReadsMemory())
- return false;
- F.setOnlyReadsMemory();
- ++NumReadOnly;
- return true;
-}
-
-static bool setOnlyAccessesArgMemory(Function &F) {
- if (F.onlyAccessesArgMemory())
- return false;
- F.setOnlyAccessesArgMemory ();
- ++NumArgMemOnly;
- return true;
-}
-
-
-static bool setDoesNotThrow(Function &F) {
- if (F.doesNotThrow())
- return false;
- F.setDoesNotThrow();
- ++NumNoUnwind;
- return true;
-}
-
-static bool setDoesNotCapture(Function &F, unsigned n) {
- if (F.doesNotCapture(n))
- return false;
- F.setDoesNotCapture(n);
- ++NumNoCapture;
- return true;
-}
-
-static bool setOnlyReadsMemory(Function &F, unsigned n) {
- if (F.onlyReadsMemory(n))
- return false;
- F.setOnlyReadsMemory(n);
- ++NumReadOnlyArg;
- return true;
-}
-
-static bool setDoesNotAlias(Function &F, unsigned n) {
- if (F.doesNotAlias(n))
- return false;
- F.setDoesNotAlias(n);
- ++NumNoAlias;
- return true;
-}
-
-static bool setNonNull(Function &F, unsigned n) {
- assert((n != AttributeSet::ReturnIndex ||
- F.getReturnType()->isPointerTy()) &&
- "nonnull applies only to pointers");
- if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
- return false;
- F.addAttribute(n, Attribute::NonNull);
- ++NumNonNull;
- return true;
-}
-
-/// Analyze the name and prototype of the given function and set any applicable
-/// attributes.
-///
-/// Returns true if any attributes were set and false otherwise.
-static bool inferPrototypeAttributes(Function &F,
- const TargetLibraryInfo &TLI) {
- if (F.hasFnAttribute(Attribute::OptimizeNone))
- return false;
-
- FunctionType *FTy = F.getFunctionType();
- LibFunc::Func TheLibFunc;
- if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
- return false;
-
- bool Changed = false;
- switch (TheLibFunc) {
- case LibFunc::strlen:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::strchr:
- case LibFunc::strrchr:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return false;
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
- case LibFunc::strncat:
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::strxfrm:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::strcmp: // 0,1
- case LibFunc::strspn: // 0,1
- case LibFunc::strncmp: // 0,1
- case LibFunc::strcspn: // 0,1
- case LibFunc::strcoll: // 0,1
- case LibFunc::strcasecmp: // 0,1
- case LibFunc::strncasecmp: //
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::strstr:
- case LibFunc::strpbrk:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::strtok:
- case LibFunc::strtok_r:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::scanf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::strdup:
- case LibFunc::strndup:
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::stat:
- case LibFunc::statvfs:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::sscanf:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::sprintf:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::snprintf:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 3);
- Changed |= setOnlyReadsMemory(F, 3);
- return Changed;
- case LibFunc::setitimer:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setDoesNotCapture(F, 3);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::system:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "system" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::malloc:
- if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- return Changed;
- case LibFunc::memcmp:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::memchr:
- case LibFunc::memrchr:
- if (FTy->getNumParams() != 3)
- return false;
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::memcpy:
- case LibFunc::memccpy:
- case LibFunc::memmove:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::memalign:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotAlias(F, 0);
- return Changed;
- case LibFunc::mkdir:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::mktime:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::realloc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::read:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "read" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::rewind:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::rename:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::readlink:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::write:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "write" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::bcopy:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::bcmp:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::bzero:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::calloc:
- if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- return Changed;
- case LibFunc::chmod:
- case LibFunc::chown:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::ctermid:
- case LibFunc::clearerr:
- case LibFunc::closedir:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atof:
- case LibFunc::atoll:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::access:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::fopen:
- if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::fdopen:
- if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::feof:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::ftell:
- case LibFunc::fgetc:
- case LibFunc::fseeko:
- case LibFunc::ftello:
- case LibFunc::fileno:
- case LibFunc::fflush:
- case LibFunc::fclose:
- case LibFunc::fsetpos:
- case LibFunc::flockfile:
- case LibFunc::funlockfile:
- case LibFunc::ftrylockfile:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::ferror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F);
- return Changed;
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::fgets:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 3);
- return Changed;
- case LibFunc::fread:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 4);
- return Changed;
- case LibFunc::fwrite:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 4);
- return Changed;
- case LibFunc::fputs:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::fscanf:
- case LibFunc::fprintf:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::fgetpos:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::getc:
- case LibFunc::getlogin_r:
- case LibFunc::getc_unlocked:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::getenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyReadsMemory(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::gets:
- case LibFunc::getchar:
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc::getitimer:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::getpwnam:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::ungetc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::uname:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::unlink:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::unsetenv:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::utime:
- case LibFunc::utimes:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::puts:
- case LibFunc::printf:
- case LibFunc::perror:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::pread:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pread" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::pwrite:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return false;
- // May throw; "pwrite" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::putchar:
- Changed |= setDoesNotThrow(F);
- return Changed;
- case LibFunc::popen:
- if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::pclose:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::vscanf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::vsscanf:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::vfscanf:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::valloc:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- return Changed;
- case LibFunc::vprintf:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::vsnprintf:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 3);
- Changed |= setOnlyReadsMemory(F, 3);
- return Changed;
- case LibFunc::open:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "open" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::opendir:
- if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::tmpfile:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- return Changed;
- case LibFunc::times:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAccessMemory(F);
- return Changed;
- case LibFunc::lstat:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::lchown:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::qsort:
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return false;
- // May throw; places call through function pointer.
- Changed |= setDoesNotCapture(F, 4);
- return Changed;
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::dunder_strtok_r:
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::under_IO_getc:
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::under_IO_putc:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::dunder_isoc99_scanf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::dunder_isoc99_sscanf:
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::fopen64:
- if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- Changed |= setOnlyReadsMemory(F, 1);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
- case LibFunc::fseeko64:
- case LibFunc::ftello64:
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- return Changed;
- case LibFunc::tmpfile64:
- if (!FTy->getReturnType()->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotAlias(F, 0);
- return Changed;
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return false;
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
- case LibFunc::open64:
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return false;
- // May throw; "open" is a valid pthread cancellation point.
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
- case LibFunc::gettimeofday:
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return false;
- // Currently some platforms have the restrict keyword on the arguments to
- // gettimeofday. To be conservative, do not add noalias to gettimeofday's
- // arguments.
- Changed |= setDoesNotThrow(F);
- Changed |= setDoesNotCapture(F, 1);
- Changed |= setDoesNotCapture(F, 2);
- return Changed;
-
- case LibFunc::Znwj: // new(unsigned int)
- case LibFunc::Znwm: // new(unsigned long)
- case LibFunc::Znaj: // new[](unsigned int)
- case LibFunc::Znam: // new[](unsigned long)
- case LibFunc::msvc_new_int: // new(unsigned int)
- case LibFunc::msvc_new_longlong: // new(unsigned long long)
- case LibFunc::msvc_new_array_int: // new[](unsigned int)
- case LibFunc::msvc_new_array_longlong: // new[](unsigned long long)
- if (FTy->getNumParams() != 1)
- return false;
- // Operator new always returns a nonnull noalias pointer
- Changed |= setNonNull(F, AttributeSet::ReturnIndex);
- Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex);
- return Changed;
-
- //TODO: add LibFunc entries for:
- //case LibFunc::memset_pattern4:
- //case LibFunc::memset_pattern8:
- case LibFunc::memset_pattern16:
- if (FTy->isVarArg() || FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)) ||
- !isa<IntegerType>(FTy->getParamType(2)))
- return false;
-
- Changed |= setOnlyAccessesArgMemory(F);
- Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
-
- default:
- // FIXME: It'd be really nice to cover all the library functions we're
- // aware of here.
- return false;
- }
-}
-
static bool inferAllPrototypeAttributes(Module &M,
const TargetLibraryInfo &TLI) {
bool Changed = false;
for (Function &F : M.functions())
- // We only infer things using the prototype if the definition isn't around
- // to analyze directly.
- if (F.isDeclaration())
- Changed |= inferPrototypeAttributes(F, TLI);
+ // We only infer things using the prototype and the name; we don't need
+ // definitions.
+ if (F.isDeclaration() && !F.hasFnAttribute((Attribute::OptimizeNone)))
+ Changed |= inferLibFuncAttributes(F, TLI);
return Changed;
}
PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
- AnalysisManager<Module> *AM) {
- auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+ AnalysisManager<Module> &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
if (!inferAllPrototypeAttributes(M, TLI))
// If we didn't infer anything, preserve all analyses.
@@ -970,6 +59,9 @@ struct InferFunctionAttrsLegacyPass : public ModulePass {
}
bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
return inferAllPrototypeAttributes(M, TLI);
}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 1704bfea0b86a..cb1ab95ec2af1 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
@@ -37,16 +38,17 @@ namespace {
class AlwaysInliner : public Inliner {
public:
- // Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
+ AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
- AlwaysInliner(bool InsertLifetime)
- : Inliner(ID, -2000000000, InsertLifetime) {
+ AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
+ /// Main run interface method. We override here to avoid calling skipSCC().
+ bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); }
+
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) override;
@@ -64,6 +66,7 @@ INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 45609f891ed87..2aa650bd219dc 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
@@ -38,14 +39,20 @@ namespace {
/// inliner pass and the always inliner pass. The two passes use different cost
/// analyses to determine when to inline.
class SimpleInliner : public Inliner {
+ // This field is populated based on one of the following:
+ // * optimization or size-optimization levels,
+ // * the --inline-threshold flag, or
+ // * a user specified value.
+ int DefaultThreshold;
public:
- SimpleInliner() : Inliner(ID) {
+ SimpleInliner()
+ : Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
- SimpleInliner(int Threshold)
- : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
+ explicit SimpleInliner(int Threshold)
+ : Inliner(ID), DefaultThreshold(Threshold) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -54,7 +61,7 @@ public:
InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
- return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
+ return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, PSI);
}
bool runOnSCC(CallGraphSCC &SCC) override;
@@ -64,17 +71,6 @@ private:
TargetTransformInfoWrapperPass *TTIWP;
};
-static int computeThresholdFromOptLevels(unsigned OptLevel,
- unsigned SizeOptLevel) {
- if (OptLevel > 2)
- return 275;
- if (SizeOptLevel == 1) // -Os
- return 75;
- if (SizeOptLevel == 2) // -Oz
- return 25;
- return 225;
-}
-
} // end anonymous namespace
char SimpleInliner::ID = 0;
@@ -82,6 +78,7 @@ INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(SimpleInliner, "inline",
@@ -96,7 +93,7 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
unsigned SizeOptLevel) {
return new SimpleInliner(
- computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+ llvm::computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
}
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index bbe5f8761d5f1..79535ca497803 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -21,6 +20,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -28,9 +28,9 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -47,40 +47,19 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together");
// if those would be more profitable and blocked inline steps.
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
-static cl::opt<int>
-InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
- cl::desc("Control the amount of inlining to perform (default = 225)"));
-
-static cl::opt<int>
-HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
- cl::desc("Threshold for inlining functions with inline hint"));
-
-// We instroduce this threshold to help performance of instrumentation based
-// PGO before we actually hook up inliner with analysis passes such as BPI and
-// BFI.
-static cl::opt<int>
-ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
- cl::desc("Threshold for inlining functions with cold attribute"));
-
-// Threshold to use when optsize is specified (and there is no -inline-limit).
-const int OptSizeThreshold = 75;
+Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
-Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
-}
-
-Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
- : CallGraphSCCPass(ID),
- InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
- : Threshold),
- InsertLifetime(InsertLifetime) {}
+Inliner::Inliner(char &ID, bool InsertLifetime)
+ : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getAAResultsAnalysisUsage(AU);
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -243,67 +222,6 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
return true;
}
-unsigned Inliner::getInlineThreshold(CallSite CS) const {
- int Threshold = InlineThreshold; // -inline-threshold or else selected by
- // overall opt level
-
- // If -inline-threshold is not given, listen to the optsize attribute when it
- // would decrease the threshold.
- Function *Caller = CS.getCaller();
- bool OptSize = Caller && !Caller->isDeclaration() &&
- // FIXME: Use Function::optForSize().
- Caller->hasFnAttribute(Attribute::OptimizeForSize);
- if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
- OptSizeThreshold < Threshold)
- Threshold = OptSizeThreshold;
-
- Function *Callee = CS.getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- return Threshold;
-
- // If profile information is available, use that to adjust threshold of hot
- // and cold functions.
- // FIXME: The heuristic used below for determining hotness and coldness are
- // based on preliminary SPEC tuning and may not be optimal. Replace this with
- // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
- uint64_t FunctionCount = 0, MaxFunctionCount = 0;
- bool HasPGOCounts = false;
- if (Callee->getEntryCount() &&
- Callee->getParent()->getMaximumFunctionCount()) {
- HasPGOCounts = true;
- FunctionCount = Callee->getEntryCount().getValue();
- MaxFunctionCount =
- Callee->getParent()->getMaximumFunctionCount().getValue();
- }
-
- // Listen to the inlinehint attribute or profile based hotness information
- // when it would increase the threshold and the caller does not need to
- // minimize its size.
- bool InlineHint =
- Callee->hasFnAttribute(Attribute::InlineHint) ||
- (HasPGOCounts &&
- FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
- if (InlineHint && HintThreshold > Threshold &&
- !Caller->hasFnAttribute(Attribute::MinSize))
- Threshold = HintThreshold;
-
- // Listen to the cold attribute or profile based coldness information
- // when it would decrease the threshold.
- bool ColdCallee =
- Callee->hasFnAttribute(Attribute::Cold) ||
- (HasPGOCounts &&
- FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
- // Command line argument for InlineLimit will override the default
- // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
- // do not use the default cold threshold even if it is smaller.
- if ((InlineLimit.getNumOccurrences() == 0 ||
- ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
- ColdThreshold < Threshold)
- Threshold = ColdThreshold;
-
- return Threshold;
-}
-
static void emitAnalysis(CallSite CS, const Twine &Msg) {
Function *Caller = CS.getCaller();
LLVMContext &Ctx = Caller->getContext();
@@ -311,6 +229,76 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) {
emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
}
+bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
+ int &TotalSecondaryCost) {
+
+ // For now we only handle local or inline functions.
+ if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
+ return false;
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
+ TotalSecondaryCost = 0;
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
+ for (User *U : Caller->users()) {
+ CallSite CS2(U);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+
+ InlineCost IC2 = getInlineCost(CS2);
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+ if (IC2.isAlways())
+ continue;
+
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
+ inliningPreventsSomeOuterInline = true;
+ TotalSecondaryCost += IC2.getCost();
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (callerWillBeRemoved && !Caller->use_empty())
+ TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
+ return true;
+
+ return false;
+}
+
/// Return true if the inliner should attempt to inline at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
InlineCost IC = getInlineCost(CS);
@@ -342,77 +330,17 @@ bool Inliner::shouldInline(CallSite CS) {
Twine(IC.getCostDelta() + IC.getCost()) + ")");
return false;
}
-
- // Try to detect the case where the current inlining candidate caller (call
- // it B) is a static or linkonce-ODR function and is an inlining candidate
- // elsewhere, and the current candidate callee (call it C) is large enough
- // that inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B into
- // its callers.
- //
- // This only applies to static and linkonce-ODR functions because those are
- // expected to be available for inlining in the translation units where they
- // are used. Thus we will always have the opportunity to make local inlining
- // decisions. Importantly the linkonce-ODR linkage covers inline functions
- // and templates in C++.
- //
- // FIXME: All of this logic should be sunk into getInlineCost. It relies on
- // the internal implementation of the inline cost metrics rather than
- // treating them as truly abstract units etc.
- if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
- int TotalSecondaryCost = 0;
- // The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
- // This bool tracks what happens if we do NOT inline C into B.
- bool callerWillBeRemoved = Caller->hasLocalLinkage();
- // This bool tracks what happens if we DO inline C into B.
- bool inliningPreventsSomeOuterInline = false;
- for (User *U : Caller->users()) {
- CallSite CS2(U);
-
- // If this isn't a call to Caller (it could be some other sort
- // of reference) skip it. Such references will prevent the caller
- // from being removed.
- if (!CS2 || CS2.getCalledFunction() != Caller) {
- callerWillBeRemoved = false;
- continue;
- }
- InlineCost IC2 = getInlineCost(CS2);
- ++NumCallerCallersAnalyzed;
- if (!IC2) {
- callerWillBeRemoved = false;
- continue;
- }
- if (IC2.isAlways())
- continue;
-
- // See if inlining or original callsite would erase the cost delta of
- // this callsite. We subtract off the penalty for the call instruction,
- // which we would be deleting.
- if (IC2.getCostDelta() <= CandidateCost) {
- inliningPreventsSomeOuterInline = true;
- TotalSecondaryCost += IC2.getCost();
- }
- }
- // If all outer calls to Caller would get inlined, the cost for the last
- // one is set very low by getInlineCost, in anticipation that Caller will
- // be removed entirely. We did not account for this above unless there
- // is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->use_empty())
- TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
-
- if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
- DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
- " Cost = " << IC.getCost() <<
- ", outer Cost = " << TotalSecondaryCost << '\n');
- emitAnalysis(
- CS, Twine("Not inlining. Cost of inlining " +
- CS.getCalledFunction()->getName() +
- " increases the cost of inlining " +
- CS.getCaller()->getName() + " in other contexts"));
- return false;
- }
+ int TotalSecondaryCost = 0;
+ if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost)) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
+ << " Cost = " << IC.getCost()
+ << ", outer Cost = " << TotalSecondaryCost << '\n');
+ emitAnalysis(CS, Twine("Not inlining. Cost of inlining " +
+ CS.getCalledFunction()->getName() +
+ " increases the cost of inlining " +
+ CS.getCaller()->getName() + " in other contexts"));
+ return false;
}
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
@@ -440,8 +368,15 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
}
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
+ return inlineCalls(SCC);
+}
+
+bool Inliner::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
+ PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule());
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SmallPtrSet<Function*, 8> SCCFunctions;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 21bb5d000bc76..8c5c6f77077c0 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
//
// This pass loops over all of the functions and variables in the input module.
-// If the function or variable is not in the list of external names given to
-// the pass it is marked as internal.
+// If the function or variable does not need to be preserved according to the
+// client supplied callback, it is marked as internal.
//
// This transformation would not be legal in a regular compilation, but it gets
// extra information from the linker about what is safe.
@@ -19,98 +19,77 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <fstream>
#include <set>
using namespace llvm;
#define DEBUG_TYPE "internalize"
-STATISTIC(NumAliases , "Number of aliases internalized");
+STATISTIC(NumAliases, "Number of aliases internalized");
STATISTIC(NumFunctions, "Number of functions internalized");
-STATISTIC(NumGlobals , "Number of global vars internalized");
+STATISTIC(NumGlobals, "Number of global vars internalized");
// APIFile - A file which contains a list of symbols that should not be marked
// external.
static cl::opt<std::string>
-APIFile("internalize-public-api-file", cl::value_desc("filename"),
- cl::desc("A file containing list of symbol names to preserve"));
+ APIFile("internalize-public-api-file", cl::value_desc("filename"),
+ cl::desc("A file containing list of symbol names to preserve"));
// APIList - A list of symbols that should not be marked internal.
static cl::list<std::string>
-APIList("internalize-public-api-list", cl::value_desc("list"),
- cl::desc("A list of symbol names to preserve"),
- cl::CommaSeparated);
+ APIList("internalize-public-api-list", cl::value_desc("list"),
+ cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
namespace {
- class InternalizePass : public ModulePass {
- std::set<std::string> ExternalNames;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit InternalizePass();
- explicit InternalizePass(ArrayRef<const char *> ExportList);
- void LoadFile(const char *Filename);
- bool maybeInternalize(GlobalValue &GV,
- const std::set<const Comdat *> &ExternalComdats);
- void checkComdatVisibility(GlobalValue &GV,
- std::set<const Comdat *> &ExternalComdats);
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<CallGraphWrapperPass>();
- }
- };
-} // end anonymous namespace
-
-char InternalizePass::ID = 0;
-INITIALIZE_PASS(InternalizePass, "internalize",
- "Internalize Global Symbols", false, false)
-
-InternalizePass::InternalizePass() : ModulePass(ID) {
- initializeInternalizePassPass(*PassRegistry::getPassRegistry());
- if (!APIFile.empty()) // If a filename is specified, use it.
- LoadFile(APIFile.c_str());
- ExternalNames.insert(APIList.begin(), APIList.end());
-}
-
-InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
- : ModulePass(ID) {
- initializeInternalizePassPass(*PassRegistry::getPassRegistry());
- for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
- itr != ExportList.end(); itr++) {
- ExternalNames.insert(*itr);
+// Helper to load an API list to preserve from file and expose it as a functor
+// for internalization.
+class PreserveAPIList {
+public:
+ PreserveAPIList() {
+ if (!APIFile.empty())
+ LoadFile(APIFile);
+ ExternalNames.insert(APIList.begin(), APIList.end());
}
-}
-void InternalizePass::LoadFile(const char *Filename) {
- // Load the APIFile...
- std::ifstream In(Filename);
- if (!In.good()) {
- errs() << "WARNING: Internalize couldn't load file '" << Filename
- << "'! Continuing as if it's empty.\n";
- return; // Just continue as if the file were empty
+ bool operator()(const GlobalValue &GV) {
+ return ExternalNames.count(GV.getName());
}
- while (In) {
- std::string Symbol;
- In >> Symbol;
- if (!Symbol.empty())
- ExternalNames.insert(Symbol);
+
+private:
+ // Contains the set of symbols loaded from file
+ StringSet<> ExternalNames;
+
+ void LoadFile(StringRef Filename) {
+ // Load the APIFile...
+ std::ifstream In(Filename.data());
+ if (!In.good()) {
+ errs() << "WARNING: Internalize couldn't load file '" << Filename
+ << "'! Continuing as if it's empty.\n";
+ return; // Just continue as if the file were empty
+ }
+ while (In) {
+ std::string Symbol;
+ In >> Symbol;
+ if (!Symbol.empty())
+ ExternalNames.insert(Symbol);
+ }
}
-}
+};
+} // end anonymous namespace
-static bool isExternallyVisible(const GlobalValue &GV,
- const std::set<std::string> &ExternalNames) {
+bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
// Function must be defined here
if (GV.isDeclaration())
return true;
@@ -123,15 +102,17 @@ static bool isExternallyVisible(const GlobalValue &GV,
if (GV.hasDLLExportStorageClass())
return true;
- // Marked to keep external?
- if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+ // Already local, has nothing to do.
+ if (GV.hasLocalLinkage())
+ return false;
+
+ // Check some special cases
+ if (AlwaysPreserved.count(GV.getName()))
return true;
- return false;
+ return MustPreserveGV(GV);
}
-// Internalize GV if it is possible to do so, i.e. it is not externally visible
-// and is not a member of an externally visible comdat.
bool InternalizePass::maybeInternalize(
GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
if (Comdat *C = GV.getComdat()) {
@@ -148,7 +129,7 @@ bool InternalizePass::maybeInternalize(
if (GV.hasLocalLinkage())
return false;
- if (isExternallyVisible(GV, ExternalNames))
+ if (shouldPreserveGV(GV))
return false;
}
@@ -165,13 +146,12 @@ void InternalizePass::checkComdatVisibility(
if (!C)
return;
- if (isExternallyVisible(GV, ExternalNames))
+ if (shouldPreserveGV(GV))
ExternalComdats.insert(C);
}
-bool InternalizePass::runOnModule(Module &M) {
- CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
- CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
+ bool Changed = false;
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
SmallPtrSet<GlobalValue *, 8> Used;
@@ -198,13 +178,14 @@ bool InternalizePass::runOnModule(Module &M) {
// conservative, we internalize symbols in llvm.compiler.used, but we
// keep llvm.compiler.used so that the symbol is not deleted by llvm.
for (GlobalValue *V : Used) {
- ExternalNames.insert(V->getName());
+ AlwaysPreserved.insert(V->getName());
}
// Mark all functions not in the api as internal.
for (Function &I : M) {
if (!maybeInternalize(I, ExternalComdats))
continue;
+ Changed = true;
if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
@@ -217,53 +198,97 @@ bool InternalizePass::runOnModule(Module &M) {
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
// FIXME: Shouldn't this just filter on llvm.metadata section??
- ExternalNames.insert("llvm.used");
- ExternalNames.insert("llvm.compiler.used");
+ AlwaysPreserved.insert("llvm.used");
+ AlwaysPreserved.insert("llvm.compiler.used");
// Never internalize anchors used by the machine module info, else the info
// won't find them. (see MachineModuleInfo.)
- ExternalNames.insert("llvm.global_ctors");
- ExternalNames.insert("llvm.global_dtors");
- ExternalNames.insert("llvm.global.annotations");
+ AlwaysPreserved.insert("llvm.global_ctors");
+ AlwaysPreserved.insert("llvm.global_dtors");
+ AlwaysPreserved.insert("llvm.global.annotations");
// Never internalize symbols code-gen inserts.
// FIXME: We should probably add this (and the __stack_chk_guard) via some
// type of call-back in CodeGen.
- ExternalNames.insert("__stack_chk_fail");
- ExternalNames.insert("__stack_chk_guard");
+ AlwaysPreserved.insert("__stack_chk_fail");
+ AlwaysPreserved.insert("__stack_chk_guard");
// Mark all global variables with initializers that are not in the api as
// internal as well.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (!maybeInternalize(*I, ExternalComdats))
+ for (auto &GV : M.globals()) {
+ if (!maybeInternalize(GV, ExternalComdats))
continue;
+ Changed = true;
++NumGlobals;
- DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
}
// Mark all aliases that are not in the api as internal as well.
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- if (!maybeInternalize(*I, ExternalComdats))
+ for (auto &GA : M.aliases()) {
+ if (!maybeInternalize(GA, ExternalComdats))
continue;
+ Changed = true;
++NumAliases;
- DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
}
- // We do not keep track of whether this pass changed the module because
- // it adds unnecessary complexity:
- // 1) This pass will generally be near the start of the pass pipeline, so
- // there will be no analyses to invalidate.
- // 2) This pass will most likely end up changing the module and it isn't worth
- // worrying about optimizing the case where the module is unchanged.
- return true;
+ return Changed;
}
-ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
+InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
+
+PreservedAnalyses InternalizePass::run(Module &M, AnalysisManager<Module> &AM) {
+ if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<CallGraphAnalysis>();
+ return PA;
+}
+
+namespace {
+class InternalizeLegacyPass : public ModulePass {
+ // Client supplied callback to control wheter a symbol must be preserved.
+ std::function<bool(const GlobalValue &)> MustPreserveGV;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
+
+ InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
+ : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
+ initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
+ CallGraphWrapperPass *CGPass =
+ getAnalysisIfAvailable<CallGraphWrapperPass>();
+ CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+ return internalizeModule(M, MustPreserveGV, CG);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<CallGraphWrapperPass>();
+ }
+};
+}
+
+char InternalizeLegacyPass::ID = 0;
+INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
+ "Internalize Global Symbols", false, false)
+
+ModulePass *llvm::createInternalizePass() {
+ return new InternalizeLegacyPass();
+}
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
- return new InternalizePass(ExportList);
+ModulePass *llvm::createInternalizePass(
+ std::function<bool(const GlobalValue &)> MustPreserveGV) {
+ return new InternalizeLegacyPass(std::move(MustPreserveGV));
}
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index b5410f5f77577..bc3df98d504ca 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize
+required_libraries = Analysis Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 3c6a7bb7a17ab..f898c3b5a9358 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -81,7 +81,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
- if (skipOptnoneFunction(L))
+ if (skipLoop(L))
return false;
// Only visit top-level loops.
@@ -249,6 +249,9 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
}
bool BlockExtractorPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
std::set<BasicBlock*> TranslatedBlocksToNotExtract;
for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
BasicBlock *BB = BlocksToNotExtract[i];
@@ -272,15 +275,13 @@ bool BlockExtractorPass::runOnModule(Module &M) {
std::string &FuncName = BlocksToNotExtractByName.back().first;
std::string &BlockName = BlocksToNotExtractByName.back().second;
- for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- Function &F = *FI;
+ for (Function &F : M) {
if (F.getName() != FuncName) continue;
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- BasicBlock &BB = *BI;
+ for (BasicBlock &BB : F) {
if (BB.getName() != BlockName) continue;
- TranslatedBlocksToNotExtract.insert(&*BI);
+ TranslatedBlocksToNotExtract.insert(&BB);
}
}
@@ -290,18 +291,18 @@ bool BlockExtractorPass::runOnModule(Module &M) {
// Now that we know which blocks to not extract, figure out which ones we WANT
// to extract.
std::vector<BasicBlock*> BlocksToExtract;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- SplitLandingPadPreds(&*F);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (!TranslatedBlocksToNotExtract.count(&*BB))
- BlocksToExtract.push_back(&*BB);
+ for (Function &F : M) {
+ SplitLandingPadPreds(&F);
+ for (BasicBlock &BB : F)
+ if (!TranslatedBlocksToNotExtract.count(&BB))
+ BlocksToExtract.push_back(&BB);
}
- for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
+ for (BasicBlock *BlockToExtract : BlocksToExtract) {
SmallVector<BasicBlock*, 2> BlocksToExtractVec;
- BlocksToExtractVec.push_back(BlocksToExtract[i]);
+ BlocksToExtractVec.push_back(BlockToExtract);
if (const InvokeInst *II =
- dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator()))
+ dyn_cast<InvokeInst>(BlockToExtract->getTerminator()))
BlocksToExtractVec.push_back(II->getUnwindDest());
CodeExtractor(BlocksToExtractVec).extractCodeRegion();
}
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 7b515745c3122..36089f0a88018 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1,4 +1,4 @@
-//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//===-- LowerTypeTests.cpp - type metadata lowering pass ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
-// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+// This pass lowers type metadata and calls to the llvm.type.test intrinsic.
+// See http://llvm.org/docs/TypeMetadata.html for more information.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Statistic.h"
@@ -33,17 +33,18 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+using namespace lowertypetests;
-#define DEBUG_TYPE "lowerbitsets"
+#define DEBUG_TYPE "lowertypetests"
STATISTIC(ByteArraySizeBits, "Byte array size in bits");
STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
-STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
-STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered");
+STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers");
static cl::opt<bool> AvoidReuse(
- "lowerbitsets-avoid-reuse",
+ "lowertypetests-avoid-reuse",
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
@@ -203,10 +204,10 @@ struct ByteArrayInfo {
Constant *Mask;
};
-struct LowerBitSets : public ModulePass {
+struct LowerTypeTests : public ModulePass {
static char ID;
- LowerBitSets() : ModulePass(ID) {
- initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+ LowerTypeTests() : ModulePass(ID) {
+ initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
Module *M;
@@ -221,105 +222,68 @@ struct LowerBitSets : public ModulePass {
IntegerType *Int64Ty;
IntegerType *IntPtrTy;
- // The llvm.bitsets named metadata.
- NamedMDNode *BitSetNM;
-
- // Mapping from bitset identifiers to the call sites that test them.
- DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
+ // Mapping from type identifiers to the call sites that test them.
+ DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
std::vector<ByteArrayInfo> ByteArrayInfos;
BitSetInfo
- buildBitSet(Metadata *BitSet,
+ buildBitSet(Metadata *TypeId,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
ByteArrayInfo *createByteArray(BitSetInfo &BSI);
void allocateByteArrays();
Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Value *BitOffset);
- void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
- Constant *CombinedGlobalAddr,
- const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+ void
+ lowerTypeTestCalls(ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
+ const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
Value *
lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Constant *CombinedGlobal,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
- void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+ void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
ArrayRef<GlobalVariable *> Globals);
unsigned getJumpTableEntrySize();
Type *getJumpTableEntryType();
Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
unsigned Distance);
- void verifyBitSetMDNode(MDNode *Op);
- void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+ void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
+ void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
ArrayRef<Function *> Functions);
- void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+ void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds,
ArrayRef<GlobalObject *> Globals);
- bool buildBitSets();
- bool eraseBitSetMetadata();
-
- bool doInitialization(Module &M) override;
+ bool lower();
bool runOnModule(Module &M) override;
};
} // anonymous namespace
-INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
- "Lower bitset metadata", false, false)
-INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
- "Lower bitset metadata", false, false)
-char LowerBitSets::ID = 0;
-
-ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
-
-bool LowerBitSets::doInitialization(Module &Mod) {
- M = &Mod;
- const DataLayout &DL = Mod.getDataLayout();
-
- Triple TargetTriple(M->getTargetTriple());
- LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
- Arch = TargetTriple.getArch();
- ObjectFormat = TargetTriple.getObjectFormat();
+INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
+ false)
+char LowerTypeTests::ID = 0;
- Int1Ty = Type::getInt1Ty(M->getContext());
- Int8Ty = Type::getInt8Ty(M->getContext());
- Int32Ty = Type::getInt32Ty(M->getContext());
- Int32PtrTy = PointerType::getUnqual(Int32Ty);
- Int64Ty = Type::getInt64Ty(M->getContext());
- IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
+ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; }
- BitSetNM = M->getNamedMetadata("llvm.bitsets");
-
- BitSetTestCallSites.clear();
-
- return false;
-}
-
-/// Build a bit set for BitSet using the object layouts in
+/// Build a bit set for TypeId using the object layouts in
/// GlobalLayout.
-BitSetInfo LowerBitSets::buildBitSet(
- Metadata *BitSet,
+BitSetInfo LowerTypeTests::buildBitSet(
+ Metadata *TypeId,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
BitSetBuilder BSB;
- // Compute the byte offset of each element of this bitset.
- if (BitSetNM) {
- for (MDNode *Op : BitSetNM->operands()) {
- if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
- continue;
- Constant *OpConst =
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
- if (auto GA = dyn_cast<GlobalAlias>(OpConst))
- OpConst = GA->getAliasee();
- auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
- if (!OpGlobal)
+ // Compute the byte offset of each address associated with this type
+ // identifier.
+ SmallVector<MDNode *, 2> Types;
+ for (auto &GlobalAndOffset : GlobalLayout) {
+ Types.clear();
+ GlobalAndOffset.first->getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ if (Type->getOperand(1) != TypeId)
continue;
uint64_t Offset =
- cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+ cast<ConstantInt>(cast<ConstantAsMetadata>(Type->getOperand(0))
->getValue())->getZExtValue();
-
- Offset += GlobalLayout.find(OpGlobal)->second;
-
- BSB.addOffset(Offset);
+ BSB.addOffset(GlobalAndOffset.second + Offset);
}
}
@@ -341,7 +305,7 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
}
-ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+ByteArrayInfo *LowerTypeTests::createByteArray(BitSetInfo &BSI) {
// Create globals to stand in for byte arrays and masks. These never actually
// get initialized, we RAUW and erase them later in allocateByteArrays() once
// we know the offset and mask to use.
@@ -360,7 +324,7 @@ ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
return BAI;
}
-void LowerBitSets::allocateByteArrays() {
+void LowerTypeTests::allocateByteArrays() {
std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
[](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
return BAI1.BitSize > BAI2.BitSize;
@@ -413,8 +377,8 @@ void LowerBitSets::allocateByteArrays() {
/// Build a test that bit BitOffset is set in BSI, where
/// BitSetGlobal is a global containing the bits in BSI.
-Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
- ByteArrayInfo *&BAI, Value *BitOffset) {
+Value *LowerTypeTests::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+ ByteArrayInfo *&BAI, Value *BitOffset) {
if (BSI.BitSize <= 64) {
// If the bit set is sufficiently small, we can avoid a load by bit testing
// a constant.
@@ -454,9 +418,9 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
}
}
-/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// Lower a llvm.type.test call to its implementation. Returns the value to
/// replace the call with.
-Value *LowerBitSets::lowerBitSetCall(
+Value *LowerTypeTests::lowerBitSetCall(
CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
Constant *CombinedGlobalIntAddr,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
@@ -524,10 +488,10 @@ Value *LowerBitSets::lowerBitSetCall(
return P;
}
-/// Given a disjoint set of bitsets and globals, layout the globals, build the
-/// bit sets and lower the llvm.bitset.test calls.
-void LowerBitSets::buildBitSetsFromGlobalVariables(
- ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
+/// Given a disjoint set of type identifiers and globals, lay out the globals,
+/// build the bit sets and lower the llvm.type.test calls.
+void LowerTypeTests::buildBitSetsFromGlobalVariables(
+ ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalVariable *> Globals) {
// Build a new global with the combined contents of the referenced globals.
// This global is a struct whose even-indexed elements contain the original
// contents of the referenced globals and whose odd-indexed elements contain
@@ -544,7 +508,7 @@ void LowerBitSets::buildBitSetsFromGlobalVariables(
// Cap at 128 was found experimentally to have a good data/instruction
// overhead tradeoff.
if (Padding > 128)
- Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+ Padding = alignTo(InitSize, 128) - InitSize;
GlobalInits.push_back(
ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
@@ -565,7 +529,7 @@ void LowerBitSets::buildBitSetsFromGlobalVariables(
// Multiply by 2 to account for padding elements.
GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
- lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
+ lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout);
// Build aliases pointing to offsets into the combined global for each
// global from which we built the combined global, and replace references
@@ -591,19 +555,19 @@ void LowerBitSets::buildBitSetsFromGlobalVariables(
}
}
-void LowerBitSets::lowerBitSetCalls(
- ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+void LowerTypeTests::lowerTypeTestCalls(
+ ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
Constant *CombinedGlobalIntAddr =
ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
- // For each bitset in this disjoint set...
- for (Metadata *BS : BitSets) {
+ // For each type identifier in this disjoint set...
+ for (Metadata *TypeId : TypeIds) {
// Build the bitset.
- BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+ BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout);
DEBUG({
- if (auto BSS = dyn_cast<MDString>(BS))
- dbgs() << BSS->getString() << ": ";
+ if (auto MDS = dyn_cast<MDString>(TypeId))
+ dbgs() << MDS->getString() << ": ";
else
dbgs() << "<unnamed>: ";
BSI.print(dbgs());
@@ -611,9 +575,9 @@ void LowerBitSets::lowerBitSetCalls(
ByteArrayInfo *BAI = nullptr;
- // Lower each call to llvm.bitset.test for this bitset.
- for (CallInst *CI : BitSetTestCallSites[BS]) {
- ++NumBitSetCallsLowered;
+ // Lower each call to llvm.type.test for this type identifier.
+ for (CallInst *CI : TypeTestCallSites[TypeId]) {
+ ++NumTypeTestCallsLowered;
Value *Lowered =
lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
CI->replaceAllUsesWith(Lowered);
@@ -622,39 +586,32 @@ void LowerBitSets::lowerBitSetCalls(
}
}
-void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
- if (Op->getNumOperands() != 3)
+void LowerTypeTests::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
+ if (Type->getNumOperands() != 2)
report_fatal_error(
- "All operands of llvm.bitsets metadata must have 3 elements");
- if (!Op->getOperand(1))
- return;
-
- auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
- if (!OpConstMD)
- report_fatal_error("Bit set element must be a constant");
- auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
- if (!OpGlobal)
- return;
+ "All operands of type metadata must have 2 elements");
- if (OpGlobal->isThreadLocal())
+ if (GO->isThreadLocal())
report_fatal_error("Bit set element may not be thread-local");
- if (OpGlobal->hasSection())
- report_fatal_error("Bit set element may not have an explicit section");
+ if (isa<GlobalVariable>(GO) && GO->hasSection())
+ report_fatal_error(
+ "A member of a type identifier may not have an explicit section");
- if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
- report_fatal_error("Bit set global var element must be a definition");
+ if (isa<GlobalVariable>(GO) && GO->isDeclarationForLinker())
+ report_fatal_error(
+ "A global var member of a type identifier must be a definition");
- auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
if (!OffsetConstMD)
- report_fatal_error("Bit set element offset must be a constant");
+ report_fatal_error("Type offset must be a constant");
auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
if (!OffsetInt)
- report_fatal_error("Bit set element offset must be an integer constant");
+ report_fatal_error("Type offset must be an integer constant");
}
static const unsigned kX86JumpTableEntrySize = 8;
-unsigned LowerBitSets::getJumpTableEntrySize() {
+unsigned LowerTypeTests::getJumpTableEntrySize() {
if (Arch != Triple::x86 && Arch != Triple::x86_64)
report_fatal_error("Unsupported architecture for jump tables");
@@ -665,8 +622,9 @@ unsigned LowerBitSets::getJumpTableEntrySize() {
// consists of an instruction sequence containing a relative branch to Dest. The
// constant will be laid out at address Src+(Len*Distance) where Len is the
// target-specific jump table entry size.
-Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
- unsigned Distance) {
+Constant *LowerTypeTests::createJumpTableEntry(GlobalObject *Src,
+ Function *Dest,
+ unsigned Distance) {
if (Arch != Triple::x86 && Arch != Triple::x86_64)
report_fatal_error("Unsupported architecture for jump tables");
@@ -693,7 +651,7 @@ Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
return ConstantStruct::getAnon(Fields, /*Packed=*/true);
}
-Type *LowerBitSets::getJumpTableEntryType() {
+Type *LowerTypeTests::getJumpTableEntryType() {
if (Arch != Triple::x86 && Arch != Triple::x86_64)
report_fatal_error("Unsupported architecture for jump tables");
@@ -702,10 +660,10 @@ Type *LowerBitSets::getJumpTableEntryType() {
/*Packed=*/true);
}
-/// Given a disjoint set of bitsets and functions, build a jump table for the
-/// functions, build the bit sets and lower the llvm.bitset.test calls.
-void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
- ArrayRef<Function *> Functions) {
+/// Given a disjoint set of type identifiers and functions, build a jump table
+/// for the functions, build the bit sets and lower the llvm.type.test calls.
+void LowerTypeTests::buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
+ ArrayRef<Function *> Functions) {
// Unlike the global bitset builder, the function bitset builder cannot
// re-arrange functions in a particular order and base its calculations on the
// layout of the functions' entry points, as we have no idea how large a
@@ -719,8 +677,7 @@ void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
// verification done inside the module.
//
// In more concrete terms, suppose we have three functions f, g, h which are
- // members of a single bitset, and a function foo that returns their
- // addresses:
+ // of the same type, and a function foo that returns their addresses:
//
// f:
// mov 0, %eax
@@ -803,7 +760,7 @@ void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
JumpTable->setSection(ObjectFormat == Triple::MachO
? "__TEXT,__text,regular,pure_instructions"
: ".text");
- lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+ lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
// Build aliases pointing to offsets into the jump table, and replace
// references to the original functions with references to the aliases.
@@ -838,39 +795,32 @@ void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
ConstantArray::get(JumpTableType, JumpTableEntries));
}
-void LowerBitSets::buildBitSetsFromDisjointSet(
- ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
- llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
- llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
- for (unsigned I = 0; I != BitSets.size(); ++I)
- BitSetIndices[BitSets[I]] = I;
- for (unsigned I = 0; I != Globals.size(); ++I)
- GlobalIndices[Globals[I]] = I;
-
- // For each bitset, build a set of indices that refer to globals referenced by
- // the bitset.
- std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
- if (BitSetNM) {
- for (MDNode *Op : BitSetNM->operands()) {
- // Op = { bitset name, global, offset }
- if (!Op->getOperand(1))
- continue;
- auto I = BitSetIndices.find(Op->getOperand(0));
- if (I == BitSetIndices.end())
- continue;
-
- auto OpGlobal = dyn_cast<GlobalObject>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
- if (!OpGlobal)
- continue;
- BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+void LowerTypeTests::buildBitSetsFromDisjointSet(
+ ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalObject *> Globals) {
+ llvm::DenseMap<Metadata *, uint64_t> TypeIdIndices;
+ for (unsigned I = 0; I != TypeIds.size(); ++I)
+ TypeIdIndices[TypeIds[I]] = I;
+
+ // For each type identifier, build a set of indices that refer to members of
+ // the type identifier.
+ std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size());
+ SmallVector<MDNode *, 2> Types;
+ unsigned GlobalIndex = 0;
+ for (GlobalObject *GO : Globals) {
+ Types.clear();
+ GO->getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ // Type = { offset, type identifier }
+ unsigned TypeIdIndex = TypeIdIndices[Type->getOperand(1)];
+ TypeMembers[TypeIdIndex].insert(GlobalIndex);
}
+ GlobalIndex++;
}
// Order the sets of indices by size. The GlobalLayoutBuilder works best
// when given small index sets first.
std::stable_sort(
- BitSetMembers.begin(), BitSetMembers.end(),
+ TypeMembers.begin(), TypeMembers.end(),
[](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
return O1.size() < O2.size();
});
@@ -879,7 +829,7 @@ void LowerBitSets::buildBitSetsFromDisjointSet(
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
// close together as possible.
GlobalLayoutBuilder GLB(Globals.size());
- for (auto &&MemSet : BitSetMembers)
+ for (auto &&MemSet : TypeMembers)
GLB.addFragment(MemSet);
// Build the bitsets from this disjoint set.
@@ -891,13 +841,13 @@ void LowerBitSets::buildBitSetsFromDisjointSet(
for (auto &&Offset : F) {
auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
if (!GV)
- report_fatal_error(
- "Bit set may not contain both global variables and functions");
+ report_fatal_error("Type identifier may not contain both global "
+ "variables and functions");
*OGI++ = GV;
}
}
- buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+ buildBitSetsFromGlobalVariables(TypeIds, OrderedGVs);
} else {
// Build a vector of functions with the computed layout.
std::vector<Function *> OrderedFns(Globals.size());
@@ -906,102 +856,97 @@ void LowerBitSets::buildBitSetsFromDisjointSet(
for (auto &&Offset : F) {
auto Fn = dyn_cast<Function>(Globals[Offset]);
if (!Fn)
- report_fatal_error(
- "Bit set may not contain both global variables and functions");
+ report_fatal_error("Type identifier may not contain both global "
+ "variables and functions");
*OFI++ = Fn;
}
}
- buildBitSetsFromFunctions(BitSets, OrderedFns);
+ buildBitSetsFromFunctions(TypeIds, OrderedFns);
}
}
-/// Lower all bit sets in this module.
-bool LowerBitSets::buildBitSets() {
- Function *BitSetTestFunc =
- M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
- if (!BitSetTestFunc)
+/// Lower all type tests in this module.
+bool LowerTypeTests::lower() {
+ Function *TypeTestFunc =
+ M->getFunction(Intrinsic::getName(Intrinsic::type_test));
+ if (!TypeTestFunc || TypeTestFunc->use_empty())
return false;
- // Equivalence class set containing bitsets and the globals they reference.
- // This is used to partition the set of bitsets in the module into disjoint
- // sets.
+ // Equivalence class set containing type identifiers and the globals that
+ // reference them. This is used to partition the set of type identifiers in
+ // the module into disjoint sets.
typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
GlobalClassesTy;
GlobalClassesTy GlobalClasses;
- // Verify the bitset metadata and build a mapping from bitset identifiers to
- // their last observed index in BitSetNM. This will used later to
- // deterministically order the list of bitset identifiers.
- llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
- if (BitSetNM) {
- for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
- MDNode *Op = BitSetNM->getOperand(I);
- verifyBitSetMDNode(Op);
- BitSetIdIndices[Op->getOperand(0)] = I;
+ // Verify the type metadata and build a mapping from type identifiers to their
+ // last observed index in the list of globals. This will be used later to
+ // deterministically order the list of type identifiers.
+ llvm::DenseMap<Metadata *, unsigned> TypeIdIndices;
+ unsigned I = 0;
+ SmallVector<MDNode *, 2> Types;
+ for (GlobalObject &GO : M->global_objects()) {
+ Types.clear();
+ GO.getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types) {
+ verifyTypeMDNode(&GO, Type);
+ TypeIdIndices[cast<MDNode>(Type)->getOperand(1)] = ++I;
}
}
- for (const Use &U : BitSetTestFunc->uses()) {
+ for (const Use &U : TypeTestFunc->uses()) {
auto CI = cast<CallInst>(U.getUser());
auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
if (!BitSetMDVal)
report_fatal_error(
- "Second argument of llvm.bitset.test must be metadata");
+ "Second argument of llvm.type.test must be metadata");
auto BitSet = BitSetMDVal->getMetadata();
- // Add the call site to the list of call sites for this bit set. We also use
- // BitSetTestCallSites to keep track of whether we have seen this bit set
- // before. If we have, we don't need to re-add the referenced globals to the
- // equivalence class.
- std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
- bool> Ins =
- BitSetTestCallSites.insert(
+ // Add the call site to the list of call sites for this type identifier. We
+ // also use TypeTestCallSites to keep track of whether we have seen this
+ // type identifier before. If we have, we don't need to re-add the
+ // referenced globals to the equivalence class.
+ std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, bool>
+ Ins = TypeTestCallSites.insert(
std::make_pair(BitSet, std::vector<CallInst *>()));
Ins.first->second.push_back(CI);
if (!Ins.second)
continue;
- // Add the bitset to the equivalence class.
+ // Add the type identifier to the equivalence class.
GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
- if (!BitSetNM)
- continue;
-
- // Add the referenced globals to the bitset's equivalence class.
- for (MDNode *Op : BitSetNM->operands()) {
- if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
- continue;
-
- auto OpGlobal = dyn_cast<GlobalObject>(
- cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
- if (!OpGlobal)
- continue;
-
- CurSet = GlobalClasses.unionSets(
- CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+ // Add the referenced globals to the type identifier's equivalence class.
+ for (GlobalObject &GO : M->global_objects()) {
+ Types.clear();
+ GO.getMetadata(LLVMContext::MD_type, Types);
+ for (MDNode *Type : Types)
+ if (Type->getOperand(1) == BitSet)
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(&GO)));
}
}
if (GlobalClasses.empty())
return false;
- // Build a list of disjoint sets ordered by their maximum BitSetNM index
- // for determinism.
+ // Build a list of disjoint sets ordered by their maximum global index for
+ // determinism.
std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
E = GlobalClasses.end();
I != E; ++I) {
if (!I->isLeader()) continue;
- ++NumBitSetDisjointSets;
+ ++NumTypeIdDisjointSets;
unsigned MaxIndex = 0;
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
MI != GlobalClasses.member_end(); ++MI) {
if ((*MI).is<Metadata *>())
- MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
+ MaxIndex = std::max(MaxIndex, TypeIdIndices[MI->get<Metadata *>()]);
}
Sets.emplace_back(I, MaxIndex);
}
@@ -1013,26 +958,26 @@ bool LowerBitSets::buildBitSets() {
// For each disjoint set we found...
for (const auto &S : Sets) {
- // Build the list of bitsets in this disjoint set.
- std::vector<Metadata *> BitSets;
+ // Build the list of type identifiers in this disjoint set.
+ std::vector<Metadata *> TypeIds;
std::vector<GlobalObject *> Globals;
for (GlobalClassesTy::member_iterator MI =
GlobalClasses.member_begin(S.first);
MI != GlobalClasses.member_end(); ++MI) {
if ((*MI).is<Metadata *>())
- BitSets.push_back(MI->get<Metadata *>());
+ TypeIds.push_back(MI->get<Metadata *>());
else
Globals.push_back(MI->get<GlobalObject *>());
}
- // Order bitsets by BitSetNM index for determinism. This ordering is stable
- // as there is a one-to-one mapping between metadata and indices.
- std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
- return BitSetIdIndices[M1] < BitSetIdIndices[M2];
+ // Order type identifiers by global index for determinism. This ordering is
+ // stable as there is a one-to-one mapping between metadata and indices.
+ std::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) {
+ return TypeIdIndices[M1] < TypeIdIndices[M2];
});
- // Lower the bitsets in this disjoint set.
- buildBitSetsFromDisjointSet(BitSets, Globals);
+ // Build bitsets for this disjoint set.
+ buildBitSetsFromDisjointSet(TypeIds, Globals);
}
allocateByteArrays();
@@ -1040,16 +985,36 @@ bool LowerBitSets::buildBitSets() {
return true;
}
-bool LowerBitSets::eraseBitSetMetadata() {
- if (!BitSetNM)
- return false;
+// Initialization helper shared by the old and the new PM.
+static void init(LowerTypeTests *LTT, Module &M) {
+ LTT->M = &M;
+ const DataLayout &DL = M.getDataLayout();
+ Triple TargetTriple(M.getTargetTriple());
+ LTT->LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+ LTT->Arch = TargetTriple.getArch();
+ LTT->ObjectFormat = TargetTriple.getObjectFormat();
+ LTT->Int1Ty = Type::getInt1Ty(M.getContext());
+ LTT->Int8Ty = Type::getInt8Ty(M.getContext());
+ LTT->Int32Ty = Type::getInt32Ty(M.getContext());
+ LTT->Int32PtrTy = PointerType::getUnqual(LTT->Int32Ty);
+ LTT->Int64Ty = Type::getInt64Ty(M.getContext());
+ LTT->IntPtrTy = DL.getIntPtrType(M.getContext(), 0);
+ LTT->TypeTestCallSites.clear();
+}
- M->eraseNamedMetadata(BitSetNM);
- return true;
+bool LowerTypeTests::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+ init(this, M);
+ return lower();
}
-bool LowerBitSets::runOnModule(Module &M) {
- bool Changed = buildBitSets();
- Changed |= eraseBitSetMetadata();
- return Changed;
+PreservedAnalyses LowerTypeTestsPass::run(Module &M,
+ AnalysisManager<Module> &AM) {
+ LowerTypeTests Impl;
+ init(&Impl, M);
+ bool Changed = Impl.lower();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
}
diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile
deleted file mode 100644
index 5c42374139aaa..0000000000000
--- a/lib/Transforms/IPO/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMipo
-BUILD_ARCHIVE = 1
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 8a209a18c540e..fe653a75ddb5a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -89,13 +89,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -112,6 +109,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include <vector>
using namespace llvm;
@@ -189,7 +187,7 @@ public:
private:
/// Test whether two basic blocks have equivalent behaviour.
- int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
+ int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const;
/// Constants comparison.
/// Its analog to lexicographical comparison between hypothetical numbers
@@ -293,11 +291,11 @@ private:
/// look at their particular properties (bit-width for vectors, and
/// address space for pointers).
/// If these properties are equal - compare their contents.
- int cmpConstants(const Constant *L, const Constant *R);
+ int cmpConstants(const Constant *L, const Constant *R) const;
/// Compares two global values by number. Uses the GlobalNumbersState to
/// identify the same gobals across function calls.
- int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+ int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const;
/// Assign or look up previously assigned numbers for the two values, and
/// return whether the numbers are equal. Numbers are assigned in the order
@@ -317,11 +315,11 @@ private:
/// then left value is greater.
/// In another words, we compare serial numbers, for more details
/// see comments for sn_mapL and sn_mapR.
- int cmpValues(const Value *L, const Value *R);
+ int cmpValues(const Value *L, const Value *R) const;
/// Compare two Instructions for equivalence, similar to
- /// Instruction::isSameOperationAs but with modifications to the type
- /// comparison.
+ /// Instruction::isSameOperationAs.
+ ///
/// Stages are listed in "most significant stage first" order:
/// On each stage below, we do comparison between some left and right
/// operation parts. If parts are non-equal, we assign parts comparison
@@ -339,8 +337,9 @@ private:
/// For example, for Load it would be:
/// 6.1.Load: volatile (as boolean flag)
/// 6.2.Load: alignment (as integer numbers)
- /// 6.3.Load: synch-scope (as integer numbers)
- /// 6.4.Load: range metadata (as integer numbers)
+ /// 6.3.Load: ordering (as underlying enum class value)
+ /// 6.4.Load: synch-scope (as integer numbers)
+ /// 6.5.Load: range metadata (as integer ranges)
/// On this stage its better to see the code, since its not more than 10-15
/// strings for particular instruction, and could change sometimes.
int cmpOperations(const Instruction *L, const Instruction *R) const;
@@ -353,8 +352,9 @@ private:
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
- int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
- int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const;
+ int cmpGEPs(const GetElementPtrInst *GEPL,
+ const GetElementPtrInst *GEPR) const {
return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
}
@@ -401,12 +401,13 @@ private:
int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
+ int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
int cmpAPInts(const APInt &L, const APInt &R) const;
int cmpAPFloats(const APFloat &L, const APFloat &R) const;
int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
int cmpMem(StringRef L, StringRef R) const;
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
- int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+ int cmpRangeMetadata(const MDNode *L, const MDNode *R) const;
int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
// The two functions undergoing comparison.
@@ -445,7 +446,7 @@ private:
/// But, we are still not able to compare operands of PHI nodes, since those
/// could be operands from further BBs we didn't scan yet.
/// So it's impossible to use dominance properties in general.
- DenseMap<const Value*, int> sn_mapL, sn_mapR;
+ mutable DenseMap<const Value*, int> sn_mapL, sn_mapR;
// The global state we will use
GlobalNumberState* GlobalNumbers;
@@ -477,6 +478,12 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
return 0;
}
+int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
+ if ((int)L < (int)R) return -1;
+ if ((int)L > (int)R) return 1;
+ return 0;
+}
+
int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
return Res;
@@ -538,8 +545,8 @@ int FunctionComparator::cmpAttrs(const AttributeSet L,
return 0;
}
-int FunctionComparator::cmpRangeMetadata(const MDNode* L,
- const MDNode* R) const {
+int FunctionComparator::cmpRangeMetadata(const MDNode *L,
+ const MDNode *R) const {
if (L == R)
return 0;
if (!L)
@@ -547,7 +554,7 @@ int FunctionComparator::cmpRangeMetadata(const MDNode* L,
if (!R)
return 1;
// Range metadata is a sequence of numbers. Make sure they are the same
- // sequence.
+ // sequence.
// TODO: Note that as this is metadata, it is possible to drop and/or merge
// this data when considering functions to merge. Thus this comparison would
// return 0 (i.e. equivalent), but merging would become more complicated
@@ -557,8 +564,8 @@ int FunctionComparator::cmpRangeMetadata(const MDNode* L,
if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
return Res;
for (size_t I = 0; I < L->getNumOperands(); ++I) {
- ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
- ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+ ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+ ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
return Res;
}
@@ -596,7 +603,8 @@ int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
/// type.
/// 2. Compare constant contents.
/// For more details see declaration comments.
-int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
+int FunctionComparator::cmpConstants(const Constant *L,
+ const Constant *R) const {
Type *TyL = L->getType();
Type *TyR = R->getType();
@@ -793,7 +801,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
}
}
-int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
}
@@ -898,9 +906,9 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
int FunctionComparator::cmpOperations(const Instruction *L,
const Instruction *R) const {
// Differences from Instruction::isSameOperationAs:
- // * replace type comparison with calls to isEquivalentType.
- // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
- // * because of the above, we don't test for the tail bit on calls later on
+ // * replace type comparison with calls to cmpTypes.
+ // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
+ // * because of the above, we don't test for the tail bit on calls later on.
if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
return Res;
@@ -914,15 +922,6 @@ int FunctionComparator::cmpOperations(const Instruction *L,
R->getRawSubclassOptionalData()))
return Res;
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
- if (int Res = cmpTypes(AI->getAllocatedType(),
- cast<AllocaInst>(R)->getAllocatedType()))
- return Res;
- if (int Res =
- cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
- return Res;
- }
-
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same type
for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
@@ -932,6 +931,12 @@ int FunctionComparator::cmpOperations(const Instruction *L,
}
// Check special state that is a part of some instructions.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+ if (int Res = cmpTypes(AI->getAllocatedType(),
+ cast<AllocaInst>(R)->getAllocatedType()))
+ return Res;
+ return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
+ }
if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
return Res;
@@ -939,7 +944,7 @@ int FunctionComparator::cmpOperations(const Instruction *L,
cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
return Res;
if (int Res =
- cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+ cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
return Res;
if (int Res =
cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
@@ -955,7 +960,7 @@ int FunctionComparator::cmpOperations(const Instruction *L,
cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
return Res;
if (int Res =
- cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+ cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
return Res;
return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
}
@@ -996,6 +1001,7 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
return Res;
}
+ return 0;
}
if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
ArrayRef<unsigned> LIndices = EVI->getIndices();
@@ -1009,11 +1015,10 @@ int FunctionComparator::cmpOperations(const Instruction *L,
}
if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
if (int Res =
- cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+ cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
return Res;
return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
}
-
if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
if (int Res = cmpNumbers(CXI->isVolatile(),
cast<AtomicCmpXchgInst>(R)->isVolatile()))
@@ -1021,11 +1026,13 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(CXI->isWeak(),
cast<AtomicCmpXchgInst>(R)->isWeak()))
return Res;
- if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
- cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+ if (int Res =
+ cmpOrderings(CXI->getSuccessOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
return Res;
- if (int Res = cmpNumbers(CXI->getFailureOrdering(),
- cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+ if (int Res =
+ cmpOrderings(CXI->getFailureOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
return Res;
return cmpNumbers(CXI->getSynchScope(),
cast<AtomicCmpXchgInst>(R)->getSynchScope());
@@ -1037,19 +1044,30 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(RMWI->isVolatile(),
cast<AtomicRMWInst>(R)->isVolatile()))
return Res;
- if (int Res = cmpNumbers(RMWI->getOrdering(),
+ if (int Res = cmpOrderings(RMWI->getOrdering(),
cast<AtomicRMWInst>(R)->getOrdering()))
return Res;
return cmpNumbers(RMWI->getSynchScope(),
cast<AtomicRMWInst>(R)->getSynchScope());
}
+ if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
+ const PHINode *PNR = cast<PHINode>(R);
+ // Ensure that in addition to the incoming values being identical
+ // (checked by the caller of this function), the incoming blocks
+ // are also identical.
+ for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
+ if (int Res =
+ cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
+ return Res;
+ }
+ }
return 0;
}
// Determine whether two GEP operations perform the same underlying arithmetic.
// Read method declaration comments for more details.
int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
- const GEPOperator *GEPR) {
+ const GEPOperator *GEPR) const {
unsigned int ASL = GEPL->getPointerAddressSpace();
unsigned int ASR = GEPR->getPointerAddressSpace();
@@ -1106,7 +1124,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
/// this is the first time the values are seen, they're added to the mapping so
/// that we will detect mismatches on next use.
/// See comments in declaration for more details.
-int FunctionComparator::cmpValues(const Value *L, const Value *R) {
+int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
// Catch self-reference case.
if (L == FnL) {
if (R == FnR)
@@ -1149,7 +1167,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
}
// Test whether two basic blocks have equivalent behaviour.
int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
- const BasicBlock *BBR) {
+ const BasicBlock *BBR) const {
BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
@@ -1186,7 +1204,8 @@ int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
}
}
- ++InstL, ++InstR;
+ ++InstL;
+ ++InstR;
} while (InstL != InstLE && InstR != InstRE);
if (InstL != InstLE && InstR == InstRE)
@@ -1249,7 +1268,7 @@ int FunctionComparator::compare() {
// functions, then takes each block from each terminator in order. As an
// artifact, this also means that unreachable blocks are ignored.
SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
- SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+ SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
FnLBBs.push_back(&FnL->getEntryBlock());
FnRBBs.push_back(&FnR->getEntryBlock());
@@ -1517,6 +1536,9 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
}
bool MergeFunctions::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
bool Changed = false;
// All functions in the module, ordered by hash. Functions with a unique
@@ -1555,28 +1577,12 @@ bool MergeFunctions::runOnModule(Module &M) {
DEBUG(dbgs() << "size of module: " << M.size() << '\n');
DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
- // Insert only strong functions and merge them. Strong function merging
- // always deletes one of them.
- for (std::vector<WeakVH>::iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- if (!*I) continue;
- Function *F = cast<Function>(*I);
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
- !F->mayBeOverridden()) {
- Changed |= insert(F);
- }
- }
-
- // Insert only weak functions and merge them. By doing these second we
- // create thunks to the strong function when possible. When two weak
- // functions are identical, we create a new strong function with two weak
- // weak thunks to it which are identical but not mergable.
- for (std::vector<WeakVH>::iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- if (!*I) continue;
- Function *F = cast<Function>(*I);
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
- F->mayBeOverridden()) {
+ // Insert functions and merge them.
+ for (WeakVH &I : Worklist) {
+ if (!I)
+ continue;
+ Function *F = cast<Function>(I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) {
Changed |= insert(F);
}
}
@@ -1631,7 +1637,7 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
- if (HasGlobalAliases && G->hasUnnamedAddr()) {
+ if (HasGlobalAliases && G->hasGlobalUnnamedAddr()) {
if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
G->hasWeakLinkage()) {
writeAlias(F, G);
@@ -1645,7 +1651,7 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
// Helper for writeThunk,
// Selects proper bitcast operation,
// but a bit simpler then CastInst::getCastOpcode.
-static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
if (SrcTy->isStructTy()) {
assert(DestTy->isStructTy());
@@ -1673,7 +1679,7 @@ static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
// Replace G with a simple tail call to bitcast(F). Also replace direct uses
// of G with bitcast(F). Deletes G.
void MergeFunctions::writeThunk(Function *F, Function *G) {
- if (!G->mayBeOverridden()) {
+ if (!G->isInterposable()) {
// Redirect direct callers of G to F.
replaceDirectCallers(G, F);
}
@@ -1688,7 +1694,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
G->getParent());
BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
- IRBuilder<false> Builder(BB);
+ IRBuilder<> Builder(BB);
SmallVector<Value *, 16> Args;
unsigned i = 0;
@@ -1734,8 +1740,8 @@ void MergeFunctions::writeAlias(Function *F, Function *G) {
// Merge two equivalent functions. Upon completion, Function G is deleted.
void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
- if (F->mayBeOverridden()) {
- assert(G->mayBeOverridden());
+ if (F->isInterposable()) {
+ assert(G->isInterposable());
// Make them both thunks to the same internal function.
Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
@@ -1816,20 +1822,16 @@ bool MergeFunctions::insert(Function *NewFunction) {
// important when operating on more than one module independently to prevent
// cycles of thunks calling each other when the modules are linked together.
//
- // When one function is weak and the other is strong there is an order imposed
- // already. We process strong functions before weak functions.
- if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) ||
- (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden()))
- if (OldF.getFunc()->getName() > NewFunction->getName()) {
- // Swap the two functions.
- Function *F = OldF.getFunc();
- replaceFunctionInTree(*Result.first, NewFunction);
- NewFunction = F;
- assert(OldF.getFunc() != F && "Must have swapped the functions.");
- }
-
- // Never thunk a strong function to a weak function.
- assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
+ // First of all, we process strong functions before weak functions.
+ if ((OldF.getFunc()->isInterposable() && !NewFunction->isInterposable()) ||
+ (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() &&
+ OldF.getFunc()->getName() > NewFunction->getName())) {
+ // Swap the two functions.
+ Function *F = OldF.getFunc();
+ replaceFunctionInTree(*Result.first, NewFunction);
+ NewFunction = F;
+ assert(OldF.getFunc() != F && "Must have swapped the functions.");
+ }
DEBUG(dbgs() << " " << OldF.getFunc()->getName()
<< " == " << NewFunction->getName() << '\n');
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 0c5c84bbccabe..49c44173491ec 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
@@ -28,27 +29,34 @@ using namespace llvm;
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
namespace {
- struct PartialInliner : public ModulePass {
- void getAnalysisUsage(AnalysisUsage &AU) const override { }
- static char ID; // Pass identification, replacement for typeid
- PartialInliner() : ModulePass(ID) {
- initializePartialInlinerPass(*PassRegistry::getPassRegistry());
- }
+struct PartialInlinerLegacyPass : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ PartialInlinerLegacyPass() : ModulePass(ID) {
+ initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnModule(Module& M) override;
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ ModuleAnalysisManager DummyMAM;
+ auto PA = Impl.run(M, DummyMAM);
+ return !PA.areAllPreserved();
+ }
- private:
- Function* unswitchFunction(Function* F);
+private:
+ PartialInlinerPass Impl;
};
}
-char PartialInliner::ID = 0;
-INITIALIZE_PASS(PartialInliner, "partial-inliner",
- "Partial Inliner", false, false)
+char PartialInlinerLegacyPass::ID = 0;
+INITIALIZE_PASS(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner",
+ false, false)
-ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+ModulePass *llvm::createPartialInliningPass() {
+ return new PartialInlinerLegacyPass();
+}
-Function* PartialInliner::unswitchFunction(Function* F) {
+Function *PartialInlinerPass::unswitchFunction(Function *F) {
// First, verify that this function is an unswitching candidate...
BasicBlock *entryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
@@ -71,10 +79,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
- Function* duplicateFunction = CloneFunction(F, VMap,
- /*ModuleLevelChanges=*/false);
+ Function* duplicateFunction = CloneFunction(F, VMap);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
- F->getParent()->getFunctionList().push_back(duplicateFunction);
BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
@@ -112,11 +118,10 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock*> toExtract;
toExtract.push_back(newNonReturnBlock);
- for (Function::iterator FI = duplicateFunction->begin(),
- FE = duplicateFunction->end(); FI != FE; ++FI)
- if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
- &*FI != newNonReturnBlock)
- toExtract.push_back(&*FI);
+ for (BasicBlock &BB : *duplicateFunction)
+ if (&BB != newEntryBlock && &BB != newReturnBlock &&
+ &BB != newNonReturnBlock)
+ toExtract.push_back(&BB);
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
@@ -131,11 +136,10 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// Inline the top-level if test into all callers.
std::vector<User *> Users(duplicateFunction->user_begin(),
duplicateFunction->user_end());
- for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
- UI != UE; ++UI)
- if (CallInst *CI = dyn_cast<CallInst>(*UI))
+ for (User *User : Users)
+ if (CallInst *CI = dyn_cast<CallInst>(User))
InlineFunction(CI, IFI);
- else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
InlineFunction(II, IFI);
// Ditch the duplicate, since we're done with it, and rewrite all remaining
@@ -148,13 +152,13 @@ Function* PartialInliner::unswitchFunction(Function* F) {
return extractedFunction;
}
-bool PartialInliner::runOnModule(Module& M) {
+PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &) {
std::vector<Function*> worklist;
worklist.reserve(M.size());
- for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
- if (!FI->use_empty() && !FI->isDeclaration())
- worklist.push_back(&*FI);
-
+ for (Function &F : M)
+ if (!F.use_empty() && !F.isDeclaration())
+ worklist.push_back(&F);
+
bool changed = false;
while (!worklist.empty()) {
Function* currFunc = worklist.back();
@@ -178,6 +182,8 @@ bool PartialInliner::runOnModule(Module& M) {
}
}
-
- return changed;
+
+ if (changed)
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index faada9c2a7db6..cf5b76dc365be 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -16,23 +16,27 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
@@ -58,10 +62,6 @@ static cl::opt<bool> ExtraVectorizerPasses(
"extra-vectorizer-passes", cl::init(false), cl::Hidden,
cl::desc("Run cleanup optimization passes after vectorization."));
-static cl::opt<bool> UseNewSROA("use-new-sroa",
- cl::init(true), cl::Hidden,
- cl::desc("Enable the new, experimental SROA pass"));
-
static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
@@ -80,9 +80,19 @@ RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
"vectorizer instead of before"));
-static cl::opt<bool> UseCFLAA("use-cfl-aa",
- cl::init(false), cl::Hidden,
- cl::desc("Enable the new, experimental CFL alias analysis"));
+// Experimental option to use CFL-AA
+enum class CFLAAType { None, Steensgaard, Andersen, Both };
+static cl::opt<CFLAAType>
+ UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis"),
+ cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
+ clEnumValN(CFLAAType::Steensgaard, "steens",
+ "Enable unification-based CFL-AA"),
+ clEnumValN(CFLAAType::Andersen, "anders",
+ "Enable inclusion-based CFL-AA"),
+ clEnumValN(CFLAAType::Both, "both",
+ "Enable both variants of CFL-aa"),
+ clEnumValEnd));
static cl::opt<bool>
EnableMLSM("mlsm", cl::init(true), cl::Hidden,
@@ -92,25 +102,44 @@ static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
-static cl::opt<bool> EnableLoopDistribute(
- "enable-loop-distribute", cl::init(false), cl::Hidden,
- cl::desc("Enable the new, experimental LoopDistribution Pass"));
-
static cl::opt<bool> EnableNonLTOGlobalsModRef(
"enable-non-lto-gmr", cl::init(true), cl::Hidden,
cl::desc(
"Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
static cl::opt<bool> EnableLoopLoadElim(
- "enable-loop-load-elim", cl::init(false), cl::Hidden,
- cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+ "enable-loop-load-elim", cl::init(true), cl::Hidden,
+ cl::desc("Enable the LoopLoadElimination Pass"));
+
+static cl::opt<std::string> RunPGOInstrGen(
+ "profile-generate", cl::init(""), cl::Hidden,
+ cl::desc("Enable generation phase of PGO instrumentation and specify the "
+ "path of profile data file"));
+
+static cl::opt<std::string> RunPGOInstrUse(
+ "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
+ cl::desc("Enable use phase of PGO instrumentation and specify the path "
+ "of profile data file"));
+
+static cl::opt<bool> UseLoopVersioningLICM(
+ "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
+ cl::desc("Enable the experimental Loop Versioning LICM pass"));
+
+static cl::opt<bool>
+ DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
+ cl::desc("Disable pre-instrumentation inliner"));
+
+static cl::opt<int> PreInlineThreshold(
+ "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
+ cl::desc("Control the amount of inlining in pre-instrumentation inliner "
+ "(default = 75)"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
LibraryInfo = nullptr;
Inliner = nullptr;
- FunctionIndex = nullptr;
+ ModuleSummary = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
@@ -123,6 +152,10 @@ PassManagerBuilder::PassManagerBuilder() {
VerifyOutput = false;
MergeFunctions = false;
PrepareForLTO = false;
+ PGOInstrGen = RunPGOInstrGen;
+ PGOInstrUse = RunPGOInstrUse;
+ PrepareForThinLTO = false;
+ PerformThinLTO = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -137,11 +170,11 @@ static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
void PassManagerBuilder::addGlobalExtension(
PassManagerBuilder::ExtensionPointTy Ty,
PassManagerBuilder::ExtensionFn Fn) {
- GlobalExtensions->push_back(std::make_pair(Ty, Fn));
+ GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn)));
}
void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
- Extensions.push_back(std::make_pair(Ty, Fn));
+ Extensions.push_back(std::make_pair(Ty, std::move(Fn)));
}
void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
@@ -156,15 +189,34 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
void PassManagerBuilder::addInitialAliasAnalysisPasses(
legacy::PassManagerBase &PM) const {
+ switch (UseCFLAA) {
+ case CFLAAType::Steensgaard:
+ PM.add(createCFLSteensAAWrapperPass());
+ break;
+ case CFLAAType::Andersen:
+ PM.add(createCFLAndersAAWrapperPass());
+ break;
+ case CFLAAType::Both:
+ PM.add(createCFLSteensAAWrapperPass());
+ PM.add(createCFLAndersAAWrapperPass());
+ break;
+ default:
+ break;
+ }
+
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
- if (UseCFLAA)
- PM.add(createCFLAAWrapperPass());
PM.add(createTypeBasedAAWrapperPass());
PM.add(createScopedNoAliasAAWrapperPass());
}
+void PassManagerBuilder::addInstructionCombiningPass(
+ legacy::PassManagerBase &PM) const {
+ bool ExpensiveCombines = OptLevel > 2;
+ PM.add(createInstructionCombiningPass(ExpensiveCombines));
+}
+
void PassManagerBuilder::populateFunctionPassManager(
legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
@@ -178,94 +230,50 @@ void PassManagerBuilder::populateFunctionPassManager(
addInitialAliasAnalysisPasses(FPM);
FPM.add(createCFGSimplificationPass());
- if (UseNewSROA)
- FPM.add(createSROAPass());
- else
- FPM.add(createScalarReplAggregatesPass());
+ FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
+ FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}
-void PassManagerBuilder::populateModulePassManager(
- legacy::PassManagerBase &MPM) {
- // Allow forcing function attributes as a debugging and tuning aid.
- MPM.add(createForceFunctionAttrsLegacyPass());
-
- // If all optimizations are disabled, just run the always-inline pass and,
- // if enabled, the function merging pass.
- if (OptLevel == 0) {
- if (Inliner) {
- MPM.add(Inliner);
- Inliner = nullptr;
- }
-
- // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
- // creates a CGSCC pass manager, but we don't want to add extensions into
- // that pass manager. To prevent this we insert a no-op module pass to reset
- // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
- // builds. The function merging pass is
- if (MergeFunctions)
- MPM.add(createMergeFunctionsPass());
- else if (!GlobalExtensions->empty() || !Extensions.empty())
- MPM.add(createBarrierNoopPass());
-
- addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+// Do PGO instrumentation generation or use pass as the option specified.
+void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
+ if (PGOInstrGen.empty() && PGOInstrUse.empty())
return;
- }
-
- // Add LibraryInfo if we have some.
- if (LibraryInfo)
- MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- addInitialAliasAnalysisPasses(MPM);
-
- if (!DisableUnitAtATime) {
- // Infer attributes about declarations if possible.
- MPM.add(createInferFunctionAttrsLegacyPass());
-
- addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
-
- MPM.add(createIPSCCPPass()); // IP SCCP
- MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
- // Promote any localized global vars
- MPM.add(createPromoteMemoryToRegisterPass());
-
- MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
-
- MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ // Perform the preinline and cleanup passes for O1 and above.
+ // And avoid doing them if optimizing for size.
+ if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
+ // Create preinline pass.
+ MPM.add(createFunctionInliningPass(PreInlineThreshold));
+ MPM.add(createSROAPass());
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Combine silly seq's
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
-
- if (EnableNonLTOGlobalsModRef)
- // We add a module alias analysis pass here. In part due to bugs in the
- // analysis infrastructure this "works" in that the analysis stays alive
- // for the entire SCC pass run below.
- MPM.add(createGlobalsAAWrapperPass());
-
- // Start of CallGraph SCC passes.
- if (!DisableUnitAtATime)
- MPM.add(createPruneEHPass()); // Remove dead EH info
- if (Inliner) {
- MPM.add(Inliner);
- Inliner = nullptr;
+ if (!PGOInstrGen.empty()) {
+ MPM.add(createPGOInstrumentationGenLegacyPass());
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ Options.InstrProfileOutput = PGOInstrGen;
+ MPM.add(createInstrProfilingLegacyPass(Options));
}
- if (!DisableUnitAtATime)
- MPM.add(createPostOrderFunctionAttrsPass());
- if (OptLevel > 2)
- MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
-
+ if (!PGOInstrUse.empty())
+ MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+}
+void PassManagerBuilder::addFunctionSimplificationPasses(
+ legacy::PassManagerBase &MPM) {
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
- if (UseNewSROA)
- MPM.add(createSROAPass());
- else
- MPM.add(createScalarReplAggregatesPass(-1, false));
+ MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ // Speculative execution if the target has divergent branches; otherwise nop.
+ MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
MPM.add(createJumpThreadingPass()); // Thread jumps.
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
- MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+ // Combine silly seq's
+ addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
@@ -276,7 +284,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLICMPass()); // Hoist loop invariants
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
MPM.add(createCFGSimplificationPass());
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
@@ -303,7 +311,7 @@ void PassManagerBuilder::populateModulePassManager(
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
@@ -320,7 +328,7 @@ void PassManagerBuilder::populateModulePassManager(
if (BBVectorize) {
MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1 && UseGVNAfterVectorization)
MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
@@ -338,18 +346,99 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
- MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+ // Clean up after everything.
+ addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
+}
+
+void PassManagerBuilder::populateModulePassManager(
+ legacy::PassManagerBase &MPM) {
+ // Allow forcing function attributes as a debugging and tuning aid.
+ MPM.add(createForceFunctionAttrsLegacyPass());
+
+ // If all optimizations are disabled, just run the always-inline pass and,
+ // if enabled, the function merging pass.
+ if (OptLevel == 0) {
+ addPGOInstrPasses(MPM);
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = nullptr;
+ }
+
+ // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
+ // creates a CGSCC pass manager, but we don't want to add extensions into
+ // that pass manager. To prevent this we insert a no-op module pass to reset
+ // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
+ // builds. The function merging pass is
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+ else if (!GlobalExtensions->empty() || !Extensions.empty())
+ MPM.add(createBarrierNoopPass());
+
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+ return;
+ }
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo)
+ MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
+
+ addInitialAliasAnalysisPasses(MPM);
+
+ if (!DisableUnitAtATime) {
+ // Infer attributes about declarations if possible.
+ MPM.add(createInferFunctionAttrsLegacyPass());
+
+ addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+
+ MPM.add(createIPSCCPPass()); // IP SCCP
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ // Promote any localized global vars.
+ MPM.add(createPromoteMemoryToRegisterPass());
+
+ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+
+ addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
+ addExtensionsToPM(EP_Peephole, MPM);
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ }
+
+ if (!PerformThinLTO) {
+ /// PGO instrumentation is added during the compile phase for ThinLTO, do
+ /// not run it a second time
+ addPGOInstrPasses(MPM);
+ }
+
+ // Indirect call promotion that promotes intra-module targets only.
+ MPM.add(createPGOIndirectCallPromotionLegacyPass());
+
+ if (EnableNonLTOGlobalsModRef)
+ // We add a module alias analysis pass here. In part due to bugs in the
+ // analysis infrastructure this "works" in that the analysis stays alive
+ // for the entire SCC pass run below.
+ MPM.add(createGlobalsAAWrapperPass());
+
+ // Start of CallGraph SCC passes.
+ if (!DisableUnitAtATime)
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = nullptr;
+ }
+ if (!DisableUnitAtATime)
+ MPM.add(createPostOrderFunctionAttrsLegacyPass());
+ if (OptLevel > 2)
+ MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ addFunctionSimplificationPasses(MPM);
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
- if (!DisableUnitAtATime)
- MPM.add(createReversePostOrderFunctionAttrsPass());
-
- if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+ if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO &&
+ !PrepareForThinLTO)
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
// these so they are eligible for inlining at link-time. Note if they
@@ -360,6 +449,34 @@ void PassManagerBuilder::populateModulePassManager(
// globals referenced by available external functions dead
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
+
+ if (!DisableUnitAtATime)
+ MPM.add(createReversePostOrderFunctionAttrsPass());
+
+ // If we are planning to perform ThinLTO later, let's not bloat the code with
+ // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
+ // during ThinLTO and perform the rest of the optimizations afterward.
+ if (PrepareForThinLTO) {
+ // Reduce the size of the IR as much as possible.
+ MPM.add(createGlobalOptimizerPass());
+ // Rename anon function to be able to export them in the summary.
+ MPM.add(createNameAnonFunctionPass());
+ return;
+ }
+
+ if (PerformThinLTO)
+ // Optimize globals now when performing ThinLTO, this enables more
+ // optimizations later.
+ MPM.add(createGlobalOptimizerPass());
+
+ // Scheduling LoopVersioningLICM when inlining is over, because after that
+ // we may see more accurate aliasing. Reason to run this late is that too
+ // early versioning may prevent further inlining due to increase of code
+ // size. By placing it just after inlining other optimizations which runs
+ // later might get benefit of no-alias assumption in clone loop.
+ if (UseLoopVersioningLICM) {
+ MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
+ MPM.add(createLICMPass()); // Hoist loop invariants
}
if (EnableNonLTOGlobalsModRef)
@@ -391,9 +508,10 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
- // into separate loop that would otherwise inhibit vectorization.
- if (EnableLoopDistribute)
- MPM.add(createLoopDistributePass());
+ // into separate loop that would otherwise inhibit vectorization. This is
+ // currently only performed for loops marked with the metadata
+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
+ MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false));
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
@@ -407,7 +525,7 @@ void PassManagerBuilder::populateModulePassManager(
// on -O1 and no #pragma is found). Would be good to have these two passes
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
if (OptLevel > 1 && ExtraVectorizerPasses) {
// At higher optimization levels, try to clean up any runtime overlap and
// alignment checks inserted by the vectorizer. We want to track correllated
@@ -417,11 +535,11 @@ void PassManagerBuilder::populateModulePassManager(
// dead (or speculatable) control flows or more combining opportunities.
MPM.add(createEarlyCSEPass());
MPM.add(createCorrelatedValuePropagationPass());
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
MPM.add(createLICMPass());
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
MPM.add(createCFGSimplificationPass());
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
}
if (RunSLPAfterLoopVectorization) {
@@ -434,7 +552,7 @@ void PassManagerBuilder::populateModulePassManager(
if (BBVectorize) {
MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1 && UseGVNAfterVectorization)
MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
@@ -449,19 +567,22 @@ void PassManagerBuilder::populateModulePassManager(
addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass());
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
if (!DisableUnrollLoops) {
MPM.add(createLoopUnrollPass()); // Unroll small loops
// LoopUnroll may generate some redundency to cleanup.
- MPM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(MPM);
// Runtime unrolling will introduce runtime check in loop prologue. If the
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
MPM.add(createLICMPass());
+
+ // Get rid of LCSSA nodes.
+ MPM.add(createInstructionSimplifierPass());
}
// After vectorization and unrolling, assume intrinsics may tell us more
@@ -487,11 +608,15 @@ void PassManagerBuilder::populateModulePassManager(
}
void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
+ // Remove unused virtual tables to improve the quality of code generated by
+ // whole-program devirtualization and bitset lowering.
+ PM.add(createGlobalDCEPass());
+
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
- if (FunctionIndex)
- PM.add(createFunctionImportPass(FunctionIndex));
+ if (ModuleSummary)
+ PM.add(createFunctionImportPass(ModuleSummary));
// Allow forcing function attributes as a debugging and tuning aid.
PM.add(createForceFunctionAttrsLegacyPass());
@@ -499,14 +624,32 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Infer attributes about declarations if possible.
PM.add(createInferFunctionAttrsLegacyPass());
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting function
- // pointers passed as arguments to direct uses of functions.
- PM.add(createIPSCCPPass());
+ if (OptLevel > 1) {
+ // Indirect call promotion. This should promote all the targets that are
+ // left by the earlier promotion pass that promotes intra-module targets.
+ // This two-step promotion is to save the compile time. For LTO, it should
+ // produce the same result as if we only do promotion here.
+ PM.add(createPGOIndirectCallPromotionLegacyPass(true));
+
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ PM.add(createIPSCCPPass());
+ }
- // Now that we internalized some globals, see if we can hack on them!
- PM.add(createPostOrderFunctionAttrsPass());
+ // Infer attributes about definitions. The readnone attribute in particular is
+ // required for virtual constant propagation.
+ PM.add(createPostOrderFunctionAttrsLegacyPass());
PM.add(createReversePostOrderFunctionAttrsPass());
+
+ // Apply whole-program devirtualization and virtual constant propagation.
+ PM.add(createWholeProgramDevirtPass());
+
+ // That's all we need at opt level 1.
+ if (OptLevel == 1)
+ return;
+
+ // Now that we internalized some globals, see if we can hack on them!
PM.add(createGlobalOptimizerPass());
// Promote any localized global vars.
PM.add(createPromoteMemoryToRegisterPass());
@@ -522,7 +665,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// simplification opportunities, and both can propagate functions through
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
- PM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(PM);
addExtensionsToPM(EP_Peephole, PM);
// Inline small functions
@@ -544,18 +687,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createArgumentPromotionPass());
// The IPO passes may leave cruft around. Clean up after them.
- PM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(PM);
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
// Break up allocas
- if (UseNewSROA)
- PM.add(createSROAPass());
- else
- PM.add(createScalarReplAggregatesPass());
+ PM.add(createSROAPass());
// Run a few AA driven optimizations here and now, to cleanup the code.
- PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture.
+ PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
@@ -573,15 +713,20 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
+ if (!DisableUnrollLoops)
+ PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
PM.add(createLoopVectorizePass(true, LoopVectorize));
+ // The vectorizer may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ PM.add(createLoopUnrollPass());
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
// optimizer again at this point.
- PM.add(createInstructionCombiningPass()); // Initial cleanup
+ addInstructionCombiningPass(PM); // Initial cleanup
PM.add(createCFGSimplificationPass()); // if-convert
PM.add(createSCCPPass()); // Propagate exposed constants
- PM.add(createInstructionCombiningPass()); // Clean up again
+ addInstructionCombiningPass(PM); // Clean up again
PM.add(createBitTrackingDCEPass());
// More scalar chains could be vectorized due to more alias information
@@ -597,7 +742,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoadCombinePass());
// Cleanup and simplify the code after the scalar optimizations.
- PM.add(createInstructionCombiningPass());
+ addInstructionCombiningPass(PM);
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
@@ -620,6 +765,23 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
PM.add(createMergeFunctionsPass());
}
+void PassManagerBuilder::populateThinLTOPassManager(
+ legacy::PassManagerBase &PM) {
+ PerformThinLTO = true;
+
+ if (VerifyInput)
+ PM.add(createVerifierPass());
+
+ if (ModuleSummary)
+ PM.add(createFunctionImportPass(ModuleSummary));
+
+ populateModulePassManager(PM);
+
+ if (VerifyOutput)
+ PM.add(createVerifierPass());
+ PerformThinLTO = false;
+}
+
void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (LibraryInfo)
PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
@@ -627,17 +789,17 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (VerifyInput)
PM.add(createVerifierPass());
- if (OptLevel > 1)
+ if (OptLevel != 0)
addLTOOptimizationPasses(PM);
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
PM.add(createCrossDSOCFIPass());
- // Lower bit sets to globals. This pass supports Clang's control flow
- // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
- // is enabled. The pass does nothing if CFI is disabled.
- PM.add(createLowerBitSetsPass());
+ // Lower type metadata and the type.test intrinsic. This pass supports Clang's
+ // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
+ // link time if CFI is enabled. The pass does nothing if CFI is disabled.
+ PM.add(createLowerTypeTestsPass());
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 22a95fa03f7c5..2aa3fa55cefdc 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -16,7 +16,6 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
@@ -48,10 +47,10 @@ namespace {
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC) override;
- bool SimplifyFunction(Function *F);
- void DeleteBasicBlock(BasicBlock *BB);
};
}
+static bool SimplifyFunction(Function *F, CallGraph &CG);
+static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG);
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
@@ -62,22 +61,20 @@ INITIALIZE_PASS_END(PruneEH, "prune-eh",
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
-
-bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
SmallPtrSet<CallGraphNode *, 8> SCCNodes;
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
bool MadeChange = false;
// Fill SCCNodes with the elements of the SCC. Used for quickly
// looking up whether a given CallGraphNode is in this SCC.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- SCCNodes.insert(*I);
+ for (CallGraphNode *I : SCC)
+ SCCNodes.insert(I);
// First pass, scan all of the functions in the SCC, simplifying them
// according to what we know.
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- if (Function *F = (*I)->getFunction())
- MadeChange |= SimplifyFunction(F);
+ for (CallGraphNode *I : SCC)
+ if (Function *F = I->getFunction())
+ MadeChange |= SimplifyFunction(F, CG);
// Next, check to see if any callees might throw or if there are any external
// functions in this SCC: if so, we cannot prune any functions in this SCC.
@@ -93,7 +90,10 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
if (!F) {
SCCMightUnwind = true;
SCCMightReturn = true;
- } else if (F->isDeclaration() || F->mayBeOverridden()) {
+ } else if (F->isDeclaration() || F->isInterposable()) {
+ // Note: isInterposable (as opposed to hasExactDefinition) is fine above,
+ // since we're not inferring new attributes here, but only using existing,
+ // assumed to be correct, function attributes.
SCCMightUnwind |= !F->doesNotThrow();
SCCMightReturn |= !F->doesNotReturn();
} else {
@@ -153,8 +153,8 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// If the SCC doesn't unwind or doesn't throw, note this fact.
if (!SCCMightUnwind || !SCCMightReturn)
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
+ for (CallGraphNode *I : SCC) {
+ Function *F = I->getFunction();
if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
F->addFnAttr(Attribute::NoUnwind);
@@ -167,22 +167,30 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
}
}
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ for (CallGraphNode *I : SCC) {
// Convert any invoke instructions to non-throwing functions in this node
// into call instructions with a branch. This makes the exception blocks
// dead.
- if (Function *F = (*I)->getFunction())
- MadeChange |= SimplifyFunction(F);
+ if (Function *F = I->getFunction())
+ MadeChange |= SimplifyFunction(F, CG);
}
return MadeChange;
}
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ return runImpl(SCC, CG);
+}
+
+
// SimplifyFunction - Given information about callees, simplify the specified
// function if we have invokes to non-unwinding functions or code after calls to
// no-return functions.
-bool PruneEH::SimplifyFunction(Function *F) {
+static bool SimplifyFunction(Function *F, CallGraph &CG) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
@@ -192,7 +200,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
// If the unwind block is now dead, nuke it.
if (pred_empty(UnwindBlock))
- DeleteBasicBlock(UnwindBlock); // Delete the new BB.
+ DeleteBasicBlock(UnwindBlock, CG); // Delete the new BB.
++NumRemoved;
MadeChange = true;
@@ -211,7 +219,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
BB->getInstList().pop_back();
new UnreachableInst(BB->getContext(), &*BB);
- DeleteBasicBlock(New); // Delete the new BB.
+ DeleteBasicBlock(New, CG); // Delete the new BB.
MadeChange = true;
++NumUnreach;
break;
@@ -224,9 +232,8 @@ bool PruneEH::SimplifyFunction(Function *F) {
/// DeleteBasicBlock - remove the specified basic block from the program,
/// updating the callgraph to reflect any now-obsolete edges due to calls that
/// exist in the BB.
-void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
assert(pred_empty(BB) && "BB is not dead!");
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
Instruction *TokenInst = nullptr;
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 928d92ef9d121..39de108edc067 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -22,10 +22,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/SampleProfile.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Constants.h"
@@ -35,6 +37,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
@@ -76,16 +79,6 @@ static cl::opt<double> SampleProfileHotThreshold(
"sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
cl::desc("Inlined functions that account for more than N% of all samples "
"collected in the parent function, will be inlined again."));
-static cl::opt<double> SampleProfileGlobalHotThreshold(
- "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
- cl::desc("Top-level functions that account for more than N% of all samples "
- "collected in the profile, will be marked as hot for the inliner "
- "to consider."));
-static cl::opt<double> SampleProfileGlobalColdThreshold(
- "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
- cl::desc("Top-level functions that account for less than N% of all samples "
- "collected in the profile, will be marked as cold for the inliner "
- "to consider."));
namespace {
typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
@@ -100,30 +93,19 @@ typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
/// This pass reads profile data from the file specified by
/// -sample-profile-file and annotates every affected function with the
/// profile information found in that file.
-class SampleProfileLoader : public ModulePass {
+class SampleProfileLoader {
public:
- // Class identification, replacement for typeinfo
- static char ID;
-
SampleProfileLoader(StringRef Name = SampleProfileFile)
- : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+ : DT(nullptr), PDT(nullptr), LI(nullptr), ACT(nullptr), Reader(),
Samples(nullptr), Filename(Name), ProfileIsValid(false),
- TotalCollectedSamples(0) {
- initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
- }
+ TotalCollectedSamples(0) {}
- bool doInitialization(Module &M) override;
+ bool doInitialization(Module &M);
+ bool runOnModule(Module &M);
+ void setACT(AssumptionCacheTracker *A) { ACT = A; }
void dump() { Reader->dump(); }
- const char *getPassName() const override { return "Sample profile pass"; }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- }
-
protected:
bool runOnFunction(Function &F);
unsigned getFunctionLoc(Function &F);
@@ -133,14 +115,12 @@ protected:
const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineHotFunctions(Function &F);
- bool emitInlineHints(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);
- void findEquivalencesFor(BasicBlock *BB1,
- SmallVector<BasicBlock *, 8> Descendants,
+ void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
DominatorTreeBase<BasicBlock> *DomTree);
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
@@ -163,10 +143,10 @@ protected:
EdgeWeightMap EdgeWeights;
/// \brief Set of visited blocks during propagation.
- SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
+ SmallPtrSet<const BasicBlock *, 32> VisitedBlocks;
/// \brief Set of visited edges during propagation.
- SmallSet<Edge, 128> VisitedEdges;
+ SmallSet<Edge, 32> VisitedEdges;
/// \brief Equivalence classes for block weights.
///
@@ -181,6 +161,8 @@ protected:
std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
std::unique_ptr<LoopInfo> LI;
+ AssumptionCacheTracker *ACT;
+
/// \brief Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -206,6 +188,32 @@ protected:
uint64_t TotalCollectedSamples;
};
+class SampleProfileLoaderLegacyPass : public ModulePass {
+public:
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile)
+ : ModulePass(ID), SampleLoader(Name) {
+ initializeSampleProfileLoaderLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void dump() { SampleLoader.dump(); }
+
+ bool doInitialization(Module &M) override {
+ return SampleLoader.doInitialization(M);
+ }
+ const char *getPassName() const override { return "Sample profile pass"; }
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ }
+private:
+ SampleProfileLoader SampleLoader;
+};
+
class SampleCoverageTracker {
public:
SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
@@ -285,7 +293,6 @@ bool callsiteIsHot(const FunctionSamples *CallerFS,
(double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
return PercentSamples >= SampleProfileHotThreshold;
}
-
}
/// Mark as used the sample record for the given function samples at
@@ -445,7 +452,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
/// \returns the weight of \p Inst.
ErrorOr<uint64_t>
SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
- DebugLoc DLoc = Inst.getDebugLoc();
+ const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -453,6 +460,11 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
if (!FS)
return std::error_code();
+ // Ignore all dbg_value intrinsics.
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (II && II->getIntrinsicID() == Intrinsic::dbg_value)
+ return std::error_code();
+
const DILocation *DIL = DLoc;
unsigned Lineno = DLoc.getLine();
unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
@@ -476,6 +488,13 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
<< Inst << " (line offset: " << Lineno - HeaderLineno << "."
<< DIL->getDiscriminator() << " - weight: " << R.get()
<< ")\n");
+ } else {
+ // If a call instruction is inlined in profile, but not inlined here,
+ // it means that the inlined callsite has no sample, thus the call
+ // instruction should have 0 count.
+ const CallInst *CI = dyn_cast<CallInst>(&Inst);
+ if (CI && findCalleeFunctionSamples(*CI))
+ R = 0;
}
return R;
}
@@ -490,19 +509,22 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
/// \returns the weight for \p BB.
ErrorOr<uint64_t>
SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
- bool Found = false;
- uint64_t Weight = 0;
+ DenseMap<uint64_t, uint64_t> CM;
for (auto &I : BB->getInstList()) {
const ErrorOr<uint64_t> &R = getInstWeight(I);
- if (R && R.get() >= Weight) {
- Weight = R.get();
- Found = true;
+ if (R) CM[R.get()]++;
+ }
+ if (CM.size() == 0) return std::error_code();
+ uint64_t W = 0, C = 0;
+ for (const auto &C_W : CM) {
+ if (C_W.second == W) {
+ C = std::max(C, C_W.first);
+ } else if (C_W.second > W) {
+ C = C_W.first;
+ W = C_W.second;
}
}
- if (Found)
- return Weight;
- else
- return std::error_code();
+ return C;
}
/// \brief Compute and store the weights of every basic block.
@@ -549,19 +571,12 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
if (!SP)
return nullptr;
- Function *CalleeFunc = Inst.getCalledFunction();
- if (!CalleeFunc) {
- return nullptr;
- }
-
- StringRef CalleeName = CalleeFunc->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(
- CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
- DIL->getDiscriminator(), CalleeName));
+ return FS->findFunctionSamplesAt(LineLocation(
+ getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator()));
}
/// \brief Get the FunctionSamples for an instruction.
@@ -575,22 +590,17 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- SmallVector<CallsiteLocation, 10> S;
+ SmallVector<LineLocation, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
return Samples;
}
- StringRef CalleeName;
- for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
- DIL = DIL->getInlinedAt()) {
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
DISubprogram *SP = DIL->getScope()->getSubprogram();
if (!SP)
return nullptr;
- if (!CalleeName.empty()) {
- S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
- DIL->getDiscriminator(), CalleeName));
- }
- CalleeName = SP->getLinkageName();
+ S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()),
+ DIL->getDiscriminator()));
}
if (S.size() == 0)
return Samples;
@@ -601,63 +611,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return FS;
}
-/// \brief Emit an inline hint if \p F is globally hot or cold.
-///
-/// If \p F consumes a significant fraction of samples (indicated by
-/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
-/// inliner to consider the function hot.
-///
-/// If \p F consumes a small fraction of samples (indicated by
-/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
-/// to consider the function cold.
-///
-/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
-/// function globally hot or cold, we should be annotating individual callsites.
-/// This is not currently possible, but work on the inliner will eventually
-/// provide this ability. See http://reviews.llvm.org/D15003 for details and
-/// discussion.
-///
-/// \returns True if either attribute was applied to \p F.
-bool SampleProfileLoader::emitInlineHints(Function &F) {
- if (TotalCollectedSamples == 0)
- return false;
-
- uint64_t FunctionSamples = Samples->getTotalSamples();
- double SamplesPercent =
- (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
-
- // If the function collected more samples than the hot threshold, mark
- // it globally hot.
- if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
- F.addFnAttr(llvm::Attribute::InlineHint);
- std::string Msg;
- raw_string_ostream S(Msg);
- S << "Applied inline hint to globally hot function '" << F.getName()
- << "' with " << format("%.2f", SamplesPercent)
- << "% of samples (threshold: "
- << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
- S.flush();
- emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
- return true;
- }
-
- // If the function collected fewer samples than the cold threshold, mark
- // it globally cold.
- if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
- F.addFnAttr(llvm::Attribute::Cold);
- std::string Msg;
- raw_string_ostream S(Msg);
- S << "Applied cold hint to globally cold function '" << F.getName()
- << "' with " << format("%.2f", SamplesPercent)
- << "% of samples (threshold: "
- << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
- S.flush();
- emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
- return true;
- }
-
- return false;
-}
/// \brief Iteratively inline hot callsites of a function.
///
@@ -685,7 +638,7 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
}
}
for (auto CI : CIS) {
- InlineFunctionInfo IFI;
+ InlineFunctionInfo IFI(nullptr, ACT);
Function *CalledFunction = CI->getCalledFunction();
DebugLoc DLoc = CI->getDebugLoc();
uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
@@ -731,7 +684,7 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
/// with blocks from \p BB1's dominator tree, then
/// this is the post-dominator tree, and vice versa.
void SampleProfileLoader::findEquivalencesFor(
- BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
+ BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
DominatorTreeBase<BasicBlock> *DomTree) {
const BasicBlock *EC = EquivalenceClass[BB1];
uint64_t Weight = BlockWeights[EC];
@@ -859,23 +812,31 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
// edge is unknown (see setEdgeOrBlockWeight).
for (unsigned i = 0; i < 2; i++) {
uint64_t TotalWeight = 0;
- unsigned NumUnknownEdges = 0;
- Edge UnknownEdge, SelfReferentialEdge;
+ unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
+ Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
if (i == 0) {
// First, visit all predecessor edges.
+ NumTotalEdges = Predecessors[BB].size();
for (auto *Pred : Predecessors[BB]) {
Edge E = std::make_pair(Pred, BB);
TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
if (E.first == E.second)
SelfReferentialEdge = E;
}
+ if (NumTotalEdges == 1) {
+ SingleEdge = std::make_pair(Predecessors[BB][0], BB);
+ }
} else {
// On the second round, visit all successor edges.
+ NumTotalEdges = Successors[BB].size();
for (auto *Succ : Successors[BB]) {
Edge E = std::make_pair(BB, Succ);
TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
}
+ if (NumTotalEdges == 1) {
+ SingleEdge = std::make_pair(BB, Successors[BB][0]);
+ }
}
// After visiting all the edges, there are three cases that we
@@ -904,18 +865,24 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
if (NumUnknownEdges <= 1) {
uint64_t &BBWeight = BlockWeights[EC];
if (NumUnknownEdges == 0) {
- // If we already know the weight of all edges, the weight of the
- // basic block can be computed. It should be no larger than the sum
- // of all edge weights.
- if (TotalWeight > BBWeight) {
- BBWeight = TotalWeight;
+ if (!VisitedBlocks.count(EC)) {
+ // If we already know the weight of all edges, the weight of the
+ // basic block can be computed. It should be no larger than the sum
+ // of all edge weights.
+ if (TotalWeight > BBWeight) {
+ BBWeight = TotalWeight;
+ Changed = true;
+ DEBUG(dbgs() << "All edge weights for " << BB->getName()
+ << " known. Set weight for block: ";
+ printBlockWeight(dbgs(), BB););
+ }
+ } else if (NumTotalEdges == 1 &&
+ EdgeWeights[SingleEdge] < BlockWeights[EC]) {
+ // If there is only one edge for the visited basic block, use the
+ // block weight to adjust edge weight if edge weight is smaller.
+ EdgeWeights[SingleEdge] = BlockWeights[EC];
Changed = true;
- DEBUG(dbgs() << "All edge weights for " << BB->getName()
- << " known. Set weight for block: ";
- printBlockWeight(dbgs(), BB););
}
- if (VisitedBlocks.insert(EC).second)
- Changed = true;
} else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
// If there is a single unknown edge and the block has been
// visited, then we can compute E's weight.
@@ -1020,6 +987,19 @@ void SampleProfileLoader::propagateWeights(Function &F) {
MDBuilder MDB(Ctx);
for (auto &BI : F) {
BasicBlock *BB = &BI;
+
+ if (BlockWeights[BB]) {
+ for (auto &I : BB->getInstList()) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (!dyn_cast<IntrinsicInst>(&I)) {
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(BlockWeights[BB]);
+ CI->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ }
+ }
+ }
+ }
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 1)
continue;
@@ -1084,7 +1064,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
/// \returns the line number where \p F is defined. If it returns 0,
/// it means that there is no debug information available for \p F.
unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
- if (DISubprogram *S = getDISubprogram(&F))
+ if (DISubprogram *S = F.getSubprogram())
return S->getLine();
// If the start of \p F is missing, emit a diagnostic to inform the user
@@ -1165,8 +1145,6 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- Changed |= emitInlineHints(F);
-
Changed |= inlineHotFunctions(F);
// Compute basic block weights.
@@ -1190,7 +1168,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
if (Coverage < SampleProfileRecordCoverage) {
F.getContext().diagnose(DiagnosticInfoSampleProfile(
- getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ F.getSubprogram()->getFilename(), getFunctionLoc(F),
Twine(Used) + " of " + Twine(Total) + " available profile records (" +
Twine(Coverage) + "%) were applied",
DS_Warning));
@@ -1203,7 +1181,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
if (Coverage < SampleProfileSampleCoverage) {
F.getContext().diagnose(DiagnosticInfoSampleProfile(
- getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+ F.getSubprogram()->getFilename(), getFunctionLoc(F),
Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
Twine(Coverage) + "%) were applied",
DS_Warning));
@@ -1212,12 +1190,12 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
return Changed;
}
-char SampleProfileLoader::ID = 0;
-INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
- "Sample Profile loader", false, false)
-INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
-INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
- "Sample Profile loader", false, false)
+char SampleProfileLoaderLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
+ "Sample Profile loader", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
+ "Sample Profile loader", false, false)
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
@@ -1233,11 +1211,11 @@ bool SampleProfileLoader::doInitialization(Module &M) {
}
ModulePass *llvm::createSampleProfileLoaderPass() {
- return new SampleProfileLoader(SampleProfileFile);
+ return new SampleProfileLoaderLegacyPass(SampleProfileFile);
}
ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
- return new SampleProfileLoader(Name);
+ return new SampleProfileLoaderLegacyPass(Name);
}
bool SampleProfileLoader::runOnModule(Module &M) {
@@ -1254,12 +1232,33 @@ bool SampleProfileLoader::runOnModule(Module &M) {
clearFunctionData();
retval |= runOnFunction(F);
}
+ M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
return retval;
}
+bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
+ // FIXME: pass in AssumptionCache correctly for the new pass manager.
+ SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>());
+ return SampleLoader.runOnModule(M);
+}
+
bool SampleProfileLoader::runOnFunction(Function &F) {
+ F.setEntryCount(0);
Samples = Reader->getSamplesFor(F);
if (!Samples->empty())
return emitAnnotations(F);
return false;
}
+
+PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
+ AnalysisManager<Module> &AM) {
+
+ SampleProfileLoader SampleLoader(SampleProfileFile);
+
+ SampleLoader.doInitialization(M);
+
+ if (!SampleLoader.runOnModule(M))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index c94cc7c74a894..3c3c5dd19d1f0 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -53,7 +53,8 @@ static bool stripDeadPrototypes(Module &M) {
return MadeChange;
}
-PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M,
+ ModuleAnalysisManager &) {
if (stripDeadPrototypes(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -69,6 +70,9 @@ public:
*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+
return stripDeadPrototypes(M);
}
};
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 46f352f7f9f13..fd250366cef24 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -21,7 +21,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -216,11 +215,11 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
I->setName(""); // Internal symbols can't participate in linkage
}
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
- if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
- I->setName(""); // Internal symbols can't participate in linkage
- StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+ for (Function &I : M) {
+ if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
+ if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
+ I.setName(""); // Internal symbols can't participate in linkage
+ StripSymtab(I.getValueSymbolTable(), PreserveDbgInfo);
}
// Remove all names from types.
@@ -230,6 +229,9 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
}
bool StripSymbols::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
bool Changed = false;
Changed |= StripDebugInfo(M);
if (!OnlyDebugInfo)
@@ -238,10 +240,15 @@ bool StripSymbols::runOnModule(Module &M) {
}
bool StripNonDebugSymbols::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
return StripSymbolNames(M, true);
}
bool StripDebugDeclare::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
Function *Declare = M.getFunction("llvm.dbg.declare");
std::vector<Constant*> DeadConstants;
@@ -287,6 +294,9 @@ bool StripDebugDeclare::runOnModule(Module &M) {
/// optimized away by the optimizer. This special pass removes debug info for
/// such symbols.
bool StripDeadDebugInfo::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
bool Changed = false;
LLVMContext &C = M.getContext();
@@ -312,20 +322,6 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
}
for (DICompileUnit *DIC : F.compile_units()) {
- // Create our live subprogram list.
- bool SubprogramChange = false;
- for (DISubprogram *DISP : DIC->getSubprograms()) {
- // Make sure we visit each subprogram only once.
- if (!VisitedSet.insert(DISP).second)
- continue;
-
- // If the function referenced by DISP is not null, the function is live.
- if (LiveSPs.count(DISP))
- LiveSubprograms.push_back(DISP);
- else
- SubprogramChange = true;
- }
-
// Create our live global variable list.
bool GlobalVariableChange = false;
for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
@@ -341,14 +337,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
GlobalVariableChange = true;
}
- // If we found dead subprograms or global variables, replace the current
- // subprogram list/global variable list with our new live subprogram/global
- // variable list.
- if (SubprogramChange) {
- DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
- Changed = true;
- }
-
+ // If we found dead global variables, replace the current global
+ // variable list with our new live global variable list.
if (GlobalVariableChange) {
DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
Changed = true;
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
new file mode 100644
index 0000000000000..53eb4e2c90761
--- /dev/null
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -0,0 +1,843 @@
+//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements whole program optimization of virtual calls in cases
+// where we know (via !type metadata) that the list of callees is fixed. This
+// includes the following:
+// - Single implementation devirtualization: if a virtual call has a single
+// possible callee, replace all calls with a direct call to that callee.
+// - Virtual constant propagation: if the virtual function's return type is an
+// integer <=64 bits and all possible callees are readnone, for each class and
+// each list of constant arguments: evaluate the function, store the return
+// value alongside the virtual table, and rewrite each virtual call as a load
+// from the virtual table.
+// - Uniform return value optimization: if the conditions for virtual constant
+// propagation hold and each function returns the same constant value, replace
+// each virtual call with that constant.
+// - Unique return value optimization for i1 return values: if the conditions
+// for virtual constant propagation hold and a single vtable's function
+// returns 0, or a single vtable's function returns 1, replace each virtual
+// call with a comparison of the vptr against that vtable's address.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Evaluator.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <set>
+
+using namespace llvm;
+using namespace wholeprogramdevirt;
+
+#define DEBUG_TYPE "wholeprogramdevirt"
+
+// Find the minimum offset that we may store a value of size Size bits at. If
+// IsAfter is set, look for an offset before the object, otherwise look for an
+// offset after the object.
+uint64_t
+wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
+ bool IsAfter, uint64_t Size) {
+ // Find a minimum offset taking into account only vtable sizes.
+ uint64_t MinByte = 0;
+ for (const VirtualCallTarget &Target : Targets) {
+ if (IsAfter)
+ MinByte = std::max(MinByte, Target.minAfterBytes());
+ else
+ MinByte = std::max(MinByte, Target.minBeforeBytes());
+ }
+
+ // Build a vector of arrays of bytes covering, for each target, a slice of the
+ // used region (see AccumBitVector::BytesUsed in
+ // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
+ // this aligns the used regions to start at MinByte.
+ //
+ // In this example, A, B and C are vtables, # is a byte already allocated for
+ // a virtual function pointer, AAAA... (etc.) are the used regions for the
+ // vtables and Offset(X) is the value computed for the Offset variable below
+ // for X.
+ //
+ // Offset(A)
+ // | |
+ // |MinByte
+ // A: ################AAAAAAAA|AAAAAAAA
+ // B: ########BBBBBBBBBBBBBBBB|BBBB
+ // C: ########################|CCCCCCCCCCCCCCCC
+ // | Offset(B) |
+ //
+ // This code produces the slices of A, B and C that appear after the divider
+ // at MinByte.
+ std::vector<ArrayRef<uint8_t>> Used;
+ for (const VirtualCallTarget &Target : Targets) {
+ ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
+ : Target.TM->Bits->Before.BytesUsed;
+ uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
+ : MinByte - Target.minBeforeBytes();
+
+ // Disregard used regions that are smaller than Offset. These are
+ // effectively all-free regions that do not need to be checked.
+ if (VTUsed.size() > Offset)
+ Used.push_back(VTUsed.slice(Offset));
+ }
+
+ if (Size == 1) {
+ // Find a free bit in each member of Used.
+ for (unsigned I = 0;; ++I) {
+ uint8_t BitsUsed = 0;
+ for (auto &&B : Used)
+ if (I < B.size())
+ BitsUsed |= B[I];
+ if (BitsUsed != 0xff)
+ return (MinByte + I) * 8 +
+ countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
+ }
+ } else {
+ // Find a free (Size/8) byte region in each member of Used.
+ // FIXME: see if alignment helps.
+ for (unsigned I = 0;; ++I) {
+ for (auto &&B : Used) {
+ unsigned Byte = 0;
+ while ((I + Byte) < B.size() && Byte < (Size / 8)) {
+ if (B[I + Byte])
+ goto NextI;
+ ++Byte;
+ }
+ }
+ return (MinByte + I) * 8;
+ NextI:;
+ }
+ }
+}
+
+void wholeprogramdevirt::setBeforeReturnValues(
+ MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
+ unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
+ if (BitWidth == 1)
+ OffsetByte = -(AllocBefore / 8 + 1);
+ else
+ OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
+ OffsetBit = AllocBefore % 8;
+
+ for (VirtualCallTarget &Target : Targets) {
+ if (BitWidth == 1)
+ Target.setBeforeBit(AllocBefore);
+ else
+ Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
+ }
+}
+
+void wholeprogramdevirt::setAfterReturnValues(
+ MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter,
+ unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
+ if (BitWidth == 1)
+ OffsetByte = AllocAfter / 8;
+ else
+ OffsetByte = (AllocAfter + 7) / 8;
+ OffsetBit = AllocAfter % 8;
+
+ for (VirtualCallTarget &Target : Targets) {
+ if (BitWidth == 1)
+ Target.setAfterBit(AllocAfter);
+ else
+ Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
+ }
+}
+
+VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM)
+ : Fn(Fn), TM(TM),
+ IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()) {}
+
+namespace {
+
+// A slot in a set of virtual tables. The TypeID identifies the set of virtual
+// tables, and the ByteOffset is the offset in bytes from the address point to
+// the virtual function pointer.
+struct VTableSlot {
+ Metadata *TypeID;
+ uint64_t ByteOffset;
+};
+
+}
+
+namespace llvm {
+
+template <> struct DenseMapInfo<VTableSlot> {
+ static VTableSlot getEmptyKey() {
+ return {DenseMapInfo<Metadata *>::getEmptyKey(),
+ DenseMapInfo<uint64_t>::getEmptyKey()};
+ }
+ static VTableSlot getTombstoneKey() {
+ return {DenseMapInfo<Metadata *>::getTombstoneKey(),
+ DenseMapInfo<uint64_t>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const VTableSlot &I) {
+ return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^
+ DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
+ }
+ static bool isEqual(const VTableSlot &LHS,
+ const VTableSlot &RHS) {
+ return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
+ }
+};
+
+}
+
+namespace {
+
+// A virtual call site. VTable is the loaded virtual table pointer, and CS is
+// the indirect virtual call.
+struct VirtualCallSite {
+ Value *VTable;
+ CallSite CS;
+
+ // If non-null, this field points to the associated unsafe use count stored in
+ // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
+ // of that field for details.
+ unsigned *NumUnsafeUses;
+
+ void emitRemark() {
+ Function *F = CS.getCaller();
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F,
+ CS.getInstruction()->getDebugLoc(),
+ "devirtualized call");
+ }
+
+ void replaceAndErase(Value *New) {
+ emitRemark();
+ CS->replaceAllUsesWith(New);
+ if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ BranchInst::Create(II->getNormalDest(), CS.getInstruction());
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ }
+ CS->eraseFromParent();
+ // This use is no longer unsafe.
+ if (NumUnsafeUses)
+ --*NumUnsafeUses;
+ }
+};
+
+struct DevirtModule {
+ Module &M;
+ IntegerType *Int8Ty;
+ PointerType *Int8PtrTy;
+ IntegerType *Int32Ty;
+
+ MapVector<VTableSlot, std::vector<VirtualCallSite>> CallSlots;
+
+ // This map keeps track of the number of "unsafe" uses of a loaded function
+ // pointer. The key is the associated llvm.type.test intrinsic call generated
+ // by this pass. An unsafe use is one that calls the loaded function pointer
+ // directly. Every time we eliminate an unsafe use (for example, by
+ // devirtualizing it or by applying virtual constant propagation), we
+ // decrement the value stored in this map. If a value reaches zero, we can
+ // eliminate the type check by RAUWing the associated llvm.type.test call with
+ // true.
+ std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
+
+ DevirtModule(Module &M)
+ : M(M), Int8Ty(Type::getInt8Ty(M.getContext())),
+ Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
+ Int32Ty(Type::getInt32Ty(M.getContext())) {}
+
+ void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc);
+ void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
+
+ void buildTypeIdentifierMap(
+ std::vector<VTableBits> &Bits,
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
+ bool
+ tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
+ const std::set<TypeMemberInfo> &TypeMemberInfos,
+ uint64_t ByteOffset);
+ bool trySingleImplDevirt(ArrayRef<VirtualCallTarget> TargetsForSlot,
+ MutableArrayRef<VirtualCallSite> CallSites);
+ bool tryEvaluateFunctionsWithArgs(
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ ArrayRef<ConstantInt *> Args);
+ bool tryUniformRetValOpt(IntegerType *RetType,
+ ArrayRef<VirtualCallTarget> TargetsForSlot,
+ MutableArrayRef<VirtualCallSite> CallSites);
+ bool tryUniqueRetValOpt(unsigned BitWidth,
+ ArrayRef<VirtualCallTarget> TargetsForSlot,
+ MutableArrayRef<VirtualCallSite> CallSites);
+ bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ ArrayRef<VirtualCallSite> CallSites);
+
+ void rebuildGlobal(VTableBits &B);
+
+ bool run();
+};
+
+struct WholeProgramDevirt : public ModulePass {
+ static char ID;
+ WholeProgramDevirt() : ModulePass(ID) {
+ initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ return DevirtModule(M).run();
+ }
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
+char WholeProgramDevirt::ID = 0;
+
+ModulePass *llvm::createWholeProgramDevirtPass() {
+ return new WholeProgramDevirt;
+}
+
+PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
+ ModuleAnalysisManager &) {
+ if (!DevirtModule(M).run())
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+void DevirtModule::buildTypeIdentifierMap(
+ std::vector<VTableBits> &Bits,
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
+ DenseMap<GlobalVariable *, VTableBits *> GVToBits;
+ Bits.reserve(M.getGlobalList().size());
+ SmallVector<MDNode *, 2> Types;
+ for (GlobalVariable &GV : M.globals()) {
+ Types.clear();
+ GV.getMetadata(LLVMContext::MD_type, Types);
+ if (Types.empty())
+ continue;
+
+ VTableBits *&BitsPtr = GVToBits[&GV];
+ if (!BitsPtr) {
+ Bits.emplace_back();
+ Bits.back().GV = &GV;
+ Bits.back().ObjectSize =
+ M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
+ BitsPtr = &Bits.back();
+ }
+
+ for (MDNode *Type : Types) {
+ auto TypeID = Type->getOperand(1).get();
+
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ TypeIdMap[TypeID].insert({BitsPtr, Offset});
+ }
+ }
+}
+
+bool DevirtModule::tryFindVirtualCallTargets(
+ std::vector<VirtualCallTarget> &TargetsForSlot,
+ const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
+ for (const TypeMemberInfo &TM : TypeMemberInfos) {
+ if (!TM.Bits->GV->isConstant())
+ return false;
+
+ auto Init = dyn_cast<ConstantArray>(TM.Bits->GV->getInitializer());
+ if (!Init)
+ return false;
+ ArrayType *VTableTy = Init->getType();
+
+ uint64_t ElemSize =
+ M.getDataLayout().getTypeAllocSize(VTableTy->getElementType());
+ uint64_t GlobalSlotOffset = TM.Offset + ByteOffset;
+ if (GlobalSlotOffset % ElemSize != 0)
+ return false;
+
+ unsigned Op = GlobalSlotOffset / ElemSize;
+ if (Op >= Init->getNumOperands())
+ return false;
+
+ auto Fn = dyn_cast<Function>(Init->getOperand(Op)->stripPointerCasts());
+ if (!Fn)
+ return false;
+
+ // We can disregard __cxa_pure_virtual as a possible call target, as
+ // calls to pure virtuals are UB.
+ if (Fn->getName() == "__cxa_pure_virtual")
+ continue;
+
+ TargetsForSlot.push_back({Fn, &TM});
+ }
+
+ // Give up if we couldn't find any targets.
+ return !TargetsForSlot.empty();
+}
+
+bool DevirtModule::trySingleImplDevirt(
+ ArrayRef<VirtualCallTarget> TargetsForSlot,
+ MutableArrayRef<VirtualCallSite> CallSites) {
+ // See if the program contains a single implementation of this virtual
+ // function.
+ Function *TheFn = TargetsForSlot[0].Fn;
+ for (auto &&Target : TargetsForSlot)
+ if (TheFn != Target.Fn)
+ return false;
+
+ // If so, update each call site to call that implementation directly.
+ for (auto &&VCallSite : CallSites) {
+ VCallSite.emitRemark();
+ VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
+ TheFn, VCallSite.CS.getCalledValue()->getType()));
+ // This use is no longer unsafe.
+ if (VCallSite.NumUnsafeUses)
+ --*VCallSite.NumUnsafeUses;
+ }
+ return true;
+}
+
+bool DevirtModule::tryEvaluateFunctionsWithArgs(
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ ArrayRef<ConstantInt *> Args) {
+ // Evaluate each function and store the result in each target's RetVal
+ // field.
+ for (VirtualCallTarget &Target : TargetsForSlot) {
+ if (Target.Fn->arg_size() != Args.size() + 1)
+ return false;
+ for (unsigned I = 0; I != Args.size(); ++I)
+ if (Target.Fn->getFunctionType()->getParamType(I + 1) !=
+ Args[I]->getType())
+ return false;
+
+ Evaluator Eval(M.getDataLayout(), nullptr);
+ SmallVector<Constant *, 2> EvalArgs;
+ EvalArgs.push_back(
+ Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
+ EvalArgs.insert(EvalArgs.end(), Args.begin(), Args.end());
+ Constant *RetVal;
+ if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
+ !isa<ConstantInt>(RetVal))
+ return false;
+ Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
+ }
+ return true;
+}
+
+bool DevirtModule::tryUniformRetValOpt(
+ IntegerType *RetType, ArrayRef<VirtualCallTarget> TargetsForSlot,
+ MutableArrayRef<VirtualCallSite> CallSites) {
+ // Uniform return value optimization. If all functions return the same
+ // constant, replace all calls with that constant.
+ uint64_t TheRetVal = TargetsForSlot[0].RetVal;
+ for (const VirtualCallTarget &Target : TargetsForSlot)
+ if (Target.RetVal != TheRetVal)
+ return false;
+
+ auto TheRetValConst = ConstantInt::get(RetType, TheRetVal);
+ for (auto Call : CallSites)
+ Call.replaceAndErase(TheRetValConst);
+ return true;
+}
+
+bool DevirtModule::tryUniqueRetValOpt(
+ unsigned BitWidth, ArrayRef<VirtualCallTarget> TargetsForSlot,
+ MutableArrayRef<VirtualCallSite> CallSites) {
+ // IsOne controls whether we look for a 0 or a 1.
+ auto tryUniqueRetValOptFor = [&](bool IsOne) {
+ const TypeMemberInfo *UniqueMember = 0;
+ for (const VirtualCallTarget &Target : TargetsForSlot) {
+ if (Target.RetVal == (IsOne ? 1 : 0)) {
+ if (UniqueMember)
+ return false;
+ UniqueMember = Target.TM;
+ }
+ }
+
+ // We should have found a unique member or bailed out by now. We already
+ // checked for a uniform return value in tryUniformRetValOpt.
+ assert(UniqueMember);
+
+ // Replace each call with the comparison.
+ for (auto &&Call : CallSites) {
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *OneAddr = B.CreateBitCast(UniqueMember->Bits->GV, Int8PtrTy);
+ OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset);
+ Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ Call.VTable, OneAddr);
+ Call.replaceAndErase(Cmp);
+ }
+ return true;
+ };
+
+ if (BitWidth == 1) {
+ if (tryUniqueRetValOptFor(true))
+ return true;
+ if (tryUniqueRetValOptFor(false))
+ return true;
+ }
+ return false;
+}
+
+bool DevirtModule::tryVirtualConstProp(
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ ArrayRef<VirtualCallSite> CallSites) {
+ // This only works if the function returns an integer.
+ auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
+ if (!RetType)
+ return false;
+ unsigned BitWidth = RetType->getBitWidth();
+ if (BitWidth > 64)
+ return false;
+
+ // Make sure that each function does not access memory, takes at least one
+ // argument, does not use its first argument (which we assume is 'this'),
+ // and has the same return type.
+ for (VirtualCallTarget &Target : TargetsForSlot) {
+ if (!Target.Fn->doesNotAccessMemory() || Target.Fn->arg_empty() ||
+ !Target.Fn->arg_begin()->use_empty() ||
+ Target.Fn->getReturnType() != RetType)
+ return false;
+ }
+
+ // Group call sites by the list of constant arguments they pass.
+ // The comparator ensures deterministic ordering.
+ struct ByAPIntValue {
+ bool operator()(const std::vector<ConstantInt *> &A,
+ const std::vector<ConstantInt *> &B) const {
+ return std::lexicographical_compare(
+ A.begin(), A.end(), B.begin(), B.end(),
+ [](ConstantInt *AI, ConstantInt *BI) {
+ return AI->getValue().ult(BI->getValue());
+ });
+ }
+ };
+ std::map<std::vector<ConstantInt *>, std::vector<VirtualCallSite>,
+ ByAPIntValue>
+ VCallSitesByConstantArg;
+ for (auto &&VCallSite : CallSites) {
+ std::vector<ConstantInt *> Args;
+ if (VCallSite.CS.getType() != RetType)
+ continue;
+ for (auto &&Arg :
+ make_range(VCallSite.CS.arg_begin() + 1, VCallSite.CS.arg_end())) {
+ if (!isa<ConstantInt>(Arg))
+ break;
+ Args.push_back(cast<ConstantInt>(&Arg));
+ }
+ if (Args.size() + 1 != VCallSite.CS.arg_size())
+ continue;
+
+ VCallSitesByConstantArg[Args].push_back(VCallSite);
+ }
+
+ for (auto &&CSByConstantArg : VCallSitesByConstantArg) {
+ if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
+ continue;
+
+ if (tryUniformRetValOpt(RetType, TargetsForSlot, CSByConstantArg.second))
+ continue;
+
+ if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second))
+ continue;
+
+ // Find an allocation offset in bits in all vtables associated with the
+ // type.
+ uint64_t AllocBefore =
+ findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
+ uint64_t AllocAfter =
+ findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
+
+ // Calculate the total amount of padding needed to store a value at both
+ // ends of the object.
+ uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
+ for (auto &&Target : TargetsForSlot) {
+ TotalPaddingBefore += std::max<int64_t>(
+ (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
+ TotalPaddingAfter += std::max<int64_t>(
+ (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
+ }
+
+ // If the amount of padding is too large, give up.
+ // FIXME: do something smarter here.
+ if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
+ continue;
+
+ // Calculate the offset to the value as a (possibly negative) byte offset
+ // and (if applicable) a bit offset, and store the values in the targets.
+ int64_t OffsetByte;
+ uint64_t OffsetBit;
+ if (TotalPaddingBefore <= TotalPaddingAfter)
+ setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
+ OffsetBit);
+ else
+ setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
+ OffsetBit);
+
+ // Rewrite each call to a load from OffsetByte/OffsetBit.
+ for (auto Call : CSByConstantArg.second) {
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Addr = B.CreateConstGEP1_64(Call.VTable, OffsetByte);
+ if (BitWidth == 1) {
+ Value *Bits = B.CreateLoad(Addr);
+ Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
+ Value *BitsAndBit = B.CreateAnd(Bits, Bit);
+ auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
+ Call.replaceAndErase(IsBitSet);
+ } else {
+ Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
+ Value *Val = B.CreateLoad(RetType, ValAddr);
+ Call.replaceAndErase(Val);
+ }
+ }
+ }
+ return true;
+}
+
+void DevirtModule::rebuildGlobal(VTableBits &B) {
+ if (B.Before.Bytes.empty() && B.After.Bytes.empty())
+ return;
+
+ // Align each byte array to pointer width.
+ unsigned PointerSize = M.getDataLayout().getPointerSize();
+ B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize));
+ B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize));
+
+ // Before was stored in reverse order; flip it now.
+ for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
+ std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
+
+ // Build an anonymous global containing the before bytes, followed by the
+ // original initializer, followed by the after bytes.
+ auto NewInit = ConstantStruct::getAnon(
+ {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
+ B.GV->getInitializer(),
+ ConstantDataArray::get(M.getContext(), B.After.Bytes)});
+ auto NewGV =
+ new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
+ GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
+ NewGV->setSection(B.GV->getSection());
+ NewGV->setComdat(B.GV->getComdat());
+
+ // Copy the original vtable's metadata to the anonymous global, adjusting
+ // offsets as required.
+ NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
+
+ // Build an alias named after the original global, pointing at the second
+ // element (the original initializer).
+ auto Alias = GlobalAlias::create(
+ B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
+ ConstantExpr::getGetElementPtr(
+ NewInit->getType(), NewGV,
+ ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, 1)}),
+ &M);
+ Alias->setVisibility(B.GV->getVisibility());
+ Alias->takeName(B.GV);
+
+ B.GV->replaceAllUsesWith(Alias);
+ B.GV->eraseFromParent();
+}
+
+void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
+ Function *AssumeFunc) {
+ // Find all virtual calls via a virtual table pointer %p under an assumption
+ // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
+ // points to a member of the type identifier %md. Group calls by (type ID,
+ // offset) pair (effectively the identity of the virtual function) and store
+ // to CallSlots.
+ DenseSet<Value *> SeenPtrs;
+ for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
+ I != E;) {
+ auto CI = dyn_cast<CallInst>(I->getUser());
+ ++I;
+ if (!CI)
+ continue;
+
+ // Search for virtual calls based on %p and add them to DevirtCalls.
+ SmallVector<DevirtCallSite, 1> DevirtCalls;
+ SmallVector<CallInst *, 1> Assumes;
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+
+ // If we found any, add them to CallSlots. Only do this if we haven't seen
+ // the vtable pointer before, as it may have been CSE'd with pointers from
+ // other call sites, and we don't want to process call sites multiple times.
+ if (!Assumes.empty()) {
+ Metadata *TypeId =
+ cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
+ Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
+ if (SeenPtrs.insert(Ptr).second) {
+ for (DevirtCallSite Call : DevirtCalls) {
+ CallSlots[{TypeId, Call.Offset}].push_back(
+ {CI->getArgOperand(0), Call.CS, nullptr});
+ }
+ }
+ }
+
+ // We no longer need the assumes or the type test.
+ for (auto Assume : Assumes)
+ Assume->eraseFromParent();
+ // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
+ // may use the vtable argument later.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+ }
+}
+
+void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
+ Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
+
+ for (auto I = TypeCheckedLoadFunc->use_begin(),
+ E = TypeCheckedLoadFunc->use_end();
+ I != E;) {
+ auto CI = dyn_cast<CallInst>(I->getUser());
+ ++I;
+ if (!CI)
+ continue;
+
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Offset = CI->getArgOperand(1);
+ Value *TypeIdValue = CI->getArgOperand(2);
+ Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
+
+ SmallVector<DevirtCallSite, 1> DevirtCalls;
+ SmallVector<Instruction *, 1> LoadedPtrs;
+ SmallVector<Instruction *, 1> Preds;
+ bool HasNonCallUses = false;
+ findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
+ HasNonCallUses, CI);
+
+ // Start by generating "pessimistic" code that explicitly loads the function
+ // pointer from the vtable and performs the type check. If possible, we will
+ // eliminate the load and the type check later.
+
+ // If possible, only generate the load at the point where it is used.
+ // This helps avoid unnecessary spills.
+ IRBuilder<> LoadB(
+ (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
+ Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
+ Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
+ Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
+
+ for (Instruction *LoadedPtr : LoadedPtrs) {
+ LoadedPtr->replaceAllUsesWith(LoadedValue);
+ LoadedPtr->eraseFromParent();
+ }
+
+ // Likewise for the type test.
+ IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
+ CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
+
+ for (Instruction *Pred : Preds) {
+ Pred->replaceAllUsesWith(TypeTestCall);
+ Pred->eraseFromParent();
+ }
+
+ // We have already erased any extractvalue instructions that refer to the
+ // intrinsic call, but the intrinsic may have other non-extractvalue uses
+ // (although this is unlikely). In that case, explicitly build a pair and
+ // RAUW it.
+ if (!CI->use_empty()) {
+ Value *Pair = UndefValue::get(CI->getType());
+ IRBuilder<> B(CI);
+ Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
+ Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
+ CI->replaceAllUsesWith(Pair);
+ }
+
+ // The number of unsafe uses is initially the number of uses.
+ auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
+ NumUnsafeUses = DevirtCalls.size();
+
+ // If the function pointer has a non-call user, we cannot eliminate the type
+ // check, as one of those users may eventually call the pointer. Increment
+ // the unsafe use count to make sure it cannot reach zero.
+ if (HasNonCallUses)
+ ++NumUnsafeUses;
+ for (DevirtCallSite Call : DevirtCalls) {
+ CallSlots[{TypeId, Call.Offset}].push_back(
+ {Ptr, Call.CS, &NumUnsafeUses});
+ }
+
+ CI->eraseFromParent();
+ }
+}
+
+bool DevirtModule::run() {
+ Function *TypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+ Function *TypeCheckedLoadFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+ Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
+
+ if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
+ AssumeFunc->use_empty()) &&
+ (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
+ return false;
+
+ if (TypeTestFunc && AssumeFunc)
+ scanTypeTestUsers(TypeTestFunc, AssumeFunc);
+
+ if (TypeCheckedLoadFunc)
+ scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
+
+ // Rebuild type metadata into a map for easy lookup.
+ std::vector<VTableBits> Bits;
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
+ buildTypeIdentifierMap(Bits, TypeIdMap);
+ if (TypeIdMap.empty())
+ return true;
+
+ // For each (type, offset) pair:
+ bool DidVirtualConstProp = false;
+ for (auto &S : CallSlots) {
+ // Search each of the members of the type identifier for the virtual
+ // function implementation at offset S.first.ByteOffset, and add to
+ // TargetsForSlot.
+ std::vector<VirtualCallTarget> TargetsForSlot;
+ if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+ S.first.ByteOffset))
+ continue;
+
+ if (trySingleImplDevirt(TargetsForSlot, S.second))
+ continue;
+
+ DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second);
+ }
+
+ // If we were able to eliminate all unsafe uses for a type checked load,
+ // eliminate the type test by replacing it with true.
+ if (TypeCheckedLoadFunc) {
+ auto True = ConstantInt::getTrue(M.getContext());
+ for (auto &&U : NumUnsafeUsesForTypeTest) {
+ if (U.second == 0) {
+ U.first->replaceAllUsesWith(True);
+ U.first->eraseFromParent();
+ }
+ }
+ }
+
+ // Rebuild each global we touched as part of virtual constant propagation to
+ // include the before and after bytes.
+ if (DidVirtualConstProp)
+ for (VTableBits &B : Bits)
+ rebuildGlobal(B);
+
+ return true;
+}