aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/IPO
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/AlwaysInliner.cpp3
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp70
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/CalledValuePropagation.cpp423
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp22
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp12
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp66
-rw-r--r--lib/Transforms/IPO/ElimAvailExtern.cpp18
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp2
-rw-r--r--lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp115
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp318
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp7
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp187
-rw-r--r--lib/Transforms/IPO/GlobalSplit.cpp31
-rw-r--r--lib/Transforms/IPO/IPO.cpp5
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp1
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp15
-rw-r--r--lib/Transforms/IPO/Inliner.cpp229
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp5
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp315
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp134
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp781
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp165
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp1
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp380
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp41
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp202
28 files changed, 2669 insertions, 882 deletions
diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp
index b7d96007c24a..a4bbc99b1f90 100644
--- a/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -15,15 +15,12 @@
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/IPO.h"
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 72bae203ee94..b25cbcad3b9d 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -1,4 +1,4 @@
-//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -31,30 +31,59 @@
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <map>
#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "argpromotion"
@@ -65,7 +94,7 @@ STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
/// A vector used to hold the indices of a single GEP instruction
-typedef std::vector<uint64_t> IndicesVector;
+using IndicesVector = std::vector<uint64_t>;
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function. At this point, we know that it's
@@ -75,13 +104,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
ReplaceCallSite) {
-
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
FunctionType *FTy = F->getFunctionType();
std::vector<Type *> Params;
- typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
+ using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;
// ScalarizedElements - If we are promoting a pointer that has elements
// accessed out of it, keep track of which elements are accessed so that we
@@ -89,7 +117,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
//
// Arguments that are directly loaded will have a zero element value here, to
// handle cases where there are both a direct load and GEP accesses.
- //
std::map<Argument *, ScalarizeTable> ScalarizedElements;
// OriginalLoads - Keep track of a representative load instruction from the
@@ -335,7 +362,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
- //
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
I2 = NF->arg_begin();
I != E; ++I) {
@@ -537,7 +563,7 @@ static void markIndicesSafe(const IndicesVector &ToMark,
/// arguments passed in.
static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
AAResults &AAR, unsigned MaxElements) {
- typedef std::set<IndicesVector> GEPIndicesSet;
+ using GEPIndicesSet = std::set<IndicesVector>;
// Quick exit for unused arguments
if (Arg->use_empty())
@@ -693,7 +719,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
BasicBlock *BB = Load->getParent();
MemoryLocation Loc = MemoryLocation::get(Load);
- if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
+ if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
@@ -714,7 +740,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
/// \brief Checks if a type could have padding bytes.
static bool isDenselyPacked(Type *type, const DataLayout &DL) {
-
// There is no size information, so be conservative.
if (!type->isSized())
return false;
@@ -749,7 +774,6 @@ static bool isDenselyPacked(Type *type, const DataLayout &DL) {
/// \brief Checks if the padding bytes of an argument could be accessed.
static bool canPaddingBeAccessed(Argument *arg) {
-
assert(arg->hasByValAttr());
// Track all the pointers to the argument to make sure they are not captured.
@@ -788,7 +812,6 @@ static bool canPaddingBeAccessed(Argument *arg) {
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct). If safe to promote some arguments, it
/// calls the DoPromotion method.
-///
static Function *
promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
unsigned MaxElements,
@@ -964,9 +987,17 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
}
namespace {
+
/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
-///
struct ArgPromotion : public CallGraphSCCPass {
+ // Pass identification, replacement for typeid
+ static char ID;
+
+ explicit ArgPromotion(unsigned MaxElements = 3)
+ : CallGraphSCCPass(ID), MaxElements(MaxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -975,21 +1006,20 @@ struct ArgPromotion : public CallGraphSCCPass {
}
bool runOnSCC(CallGraphSCC &SCC) override;
- static char ID; // Pass identification, replacement for typeid
- explicit ArgPromotion(unsigned MaxElements = 3)
- : CallGraphSCCPass(ID), MaxElements(MaxElements) {
- initializeArgPromotionPass(*PassRegistry::getPassRegistry());
- }
private:
using llvm::Pass::doInitialization;
+
bool doInitialization(CallGraph &CG) override;
+
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned MaxElements;
};
-}
+
+} // end anonymous namespace
char ArgPromotion::ID = 0;
+
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false,
false)
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 67f18a307b9b..397561746f86 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMipo
AlwaysInliner.cpp
ArgumentPromotion.cpp
BarrierNoopPass.cpp
+ CalledValuePropagation.cpp
ConstantMerge.cpp
CrossDSOCFI.cpp
DeadArgumentElimination.cpp
diff --git a/lib/Transforms/IPO/CalledValuePropagation.cpp b/lib/Transforms/IPO/CalledValuePropagation.cpp
new file mode 100644
index 000000000000..c5f6336aa2be
--- /dev/null
+++ b/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -0,0 +1,423 @@
+//===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a transformation that attaches !callees metadata to
+// indirect call sites. For a given call site, the metadata, if present,
+// indicates the set of functions the call site could possibly target at
+// run-time. This metadata is added to indirect call sites when the set of
+// possible targets can be determined by analysis and is known to be small. The
+// analysis driving the transformation is similar to constant propagation and
+// makes uses of the generic sparse propagation solver.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/CalledValuePropagation.h"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "called-value-propagation"
+
+/// The maximum number of functions to track per lattice value. Once the number
+/// of functions a call site can possibly target exceeds this threshold, it's
+/// lattice value becomes overdefined. The number of possible lattice values is
+/// bounded by Ch(F, M), where F is the number of functions in the module and M
+/// is MaxFunctionsPerValue. As such, this value should be kept very small. We
+/// likely can't do anything useful for call sites with a large number of
+/// possible targets, anyway.
+static cl::opt<unsigned> MaxFunctionsPerValue(
+ "cvp-max-functions-per-value", cl::Hidden, cl::init(4),
+ cl::desc("The maximum number of functions to track per lattice value"));
+
+namespace {
+/// To enable interprocedural analysis, we assign LLVM values to the following
+/// groups. The register group represents SSA registers, the return group
+/// represents the return values of functions, and the memory group represents
+/// in-memory values. An LLVM Value can technically be in more than one group.
+/// It's necessary to distinguish these groups so we can, for example, track a
+/// global variable separately from the value stored at its location.
+enum class IPOGrouping { Register, Return, Memory };
+
+/// Our LatticeKeys are PointerIntPairs composed of LLVM values and groupings.
+using CVPLatticeKey = PointerIntPair<Value *, 2, IPOGrouping>;
+
+/// The lattice value type used by our custom lattice function. It holds the
+/// lattice state, and a set of functions.
+class CVPLatticeVal {
+public:
+ /// The states of the lattice values. Only the FunctionSet state is
+ /// interesting. It indicates the set of functions to which an LLVM value may
+ /// refer.
+ enum CVPLatticeStateTy { Undefined, FunctionSet, Overdefined, Untracked };
+
+ /// Comparator for sorting the functions set. We want to keep the order
+ /// deterministic for testing, etc.
+ struct Compare {
+ bool operator()(const Function *LHS, const Function *RHS) const {
+ return LHS->getName() < RHS->getName();
+ }
+ };
+
+ CVPLatticeVal() : LatticeState(Undefined) {}
+ CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {}
+ CVPLatticeVal(std::set<Function *, Compare> &&Functions)
+ : LatticeState(FunctionSet), Functions(Functions) {}
+
+ /// Get a reference to the functions held by this lattice value. The number
+ /// of functions will be zero for states other than FunctionSet.
+ const std::set<Function *, Compare> &getFunctions() const {
+ return Functions;
+ }
+
+ /// Returns true if the lattice value is in the FunctionSet state.
+ bool isFunctionSet() const { return LatticeState == FunctionSet; }
+
+ bool operator==(const CVPLatticeVal &RHS) const {
+ return LatticeState == RHS.LatticeState && Functions == RHS.Functions;
+ }
+
+ bool operator!=(const CVPLatticeVal &RHS) const {
+ return LatticeState != RHS.LatticeState || Functions != RHS.Functions;
+ }
+
+private:
+ /// Holds the state this lattice value is in.
+ CVPLatticeStateTy LatticeState;
+
+ /// Holds functions indicating the possible targets of call sites. This set
+ /// is empty for lattice values in the undefined, overdefined, and untracked
+ /// states. The maximum size of the set is controlled by
+ /// MaxFunctionsPerValue. Since most LLVM values are expected to be in
+ /// uninteresting states (i.e., overdefined), CVPLatticeVal objects should be
+ /// small and efficiently copyable.
+ std::set<Function *, Compare> Functions;
+};
+
+/// The custom lattice function used by the generic sparse propagation solver.
+/// It handles merging lattice values and computing new lattice values for
+/// constants, arguments, values returned from trackable functions, and values
+/// located in trackable global variables. It also computes the lattice values
+/// that change as a result of executing instructions.
+class CVPLatticeFunc
+ : public AbstractLatticeFunction<CVPLatticeKey, CVPLatticeVal> {
+public:
+ CVPLatticeFunc()
+ : AbstractLatticeFunction(CVPLatticeVal(CVPLatticeVal::Undefined),
+ CVPLatticeVal(CVPLatticeVal::Overdefined),
+ CVPLatticeVal(CVPLatticeVal::Untracked)) {}
+
+ /// Compute and return a CVPLatticeVal for the given CVPLatticeKey.
+ CVPLatticeVal ComputeLatticeVal(CVPLatticeKey Key) override {
+ switch (Key.getInt()) {
+ case IPOGrouping::Register:
+ if (isa<Instruction>(Key.getPointer())) {
+ return getUndefVal();
+ } else if (auto *A = dyn_cast<Argument>(Key.getPointer())) {
+ if (canTrackArgumentsInterprocedurally(A->getParent()))
+ return getUndefVal();
+ } else if (auto *C = dyn_cast<Constant>(Key.getPointer())) {
+ return computeConstant(C);
+ }
+ return getOverdefinedVal();
+ case IPOGrouping::Memory:
+ case IPOGrouping::Return:
+ if (auto *GV = dyn_cast<GlobalVariable>(Key.getPointer())) {
+ if (canTrackGlobalVariableInterprocedurally(GV))
+ return computeConstant(GV->getInitializer());
+ } else if (auto *F = cast<Function>(Key.getPointer()))
+ if (canTrackReturnsInterprocedurally(F))
+ return getUndefVal();
+ }
+ return getOverdefinedVal();
+ }
+
+ /// Merge the two given lattice values. The interesting cases are merging two
+ /// FunctionSet values and a FunctionSet value with an Undefined value. For
+ /// these cases, we simply union the function sets. If the size of the union
+ /// is greater than the maximum functions we track, the merged value is
+ /// overdefined.
+ CVPLatticeVal MergeValues(CVPLatticeVal X, CVPLatticeVal Y) override {
+ if (X == getOverdefinedVal() || Y == getOverdefinedVal())
+ return getOverdefinedVal();
+ if (X == getUndefVal() && Y == getUndefVal())
+ return getUndefVal();
+ std::set<Function *, CVPLatticeVal::Compare> Union;
+ std::set_union(X.getFunctions().begin(), X.getFunctions().end(),
+ Y.getFunctions().begin(), Y.getFunctions().end(),
+ std::inserter(Union, Union.begin()),
+ CVPLatticeVal::Compare{});
+ if (Union.size() > MaxFunctionsPerValue)
+ return getOverdefinedVal();
+ return CVPLatticeVal(std::move(Union));
+ }
+
+ /// Compute the lattice values that change as a result of executing the given
+ /// instruction. The changed values are stored in \p ChangedValues. We handle
+ /// just a few kinds of instructions since we're only propagating values that
+ /// can be called.
+ void ComputeInstructionState(
+ Instruction &I, DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) override {
+ switch (I.getOpcode()) {
+ case Instruction::Call:
+ return visitCallSite(cast<CallInst>(&I), ChangedValues, SS);
+ case Instruction::Invoke:
+ return visitCallSite(cast<InvokeInst>(&I), ChangedValues, SS);
+ case Instruction::Load:
+ return visitLoad(*cast<LoadInst>(&I), ChangedValues, SS);
+ case Instruction::Ret:
+ return visitReturn(*cast<ReturnInst>(&I), ChangedValues, SS);
+ case Instruction::Select:
+ return visitSelect(*cast<SelectInst>(&I), ChangedValues, SS);
+ case Instruction::Store:
+ return visitStore(*cast<StoreInst>(&I), ChangedValues, SS);
+ default:
+ return visitInst(I, ChangedValues, SS);
+ }
+ }
+
+ /// Print the given CVPLatticeVal to the specified stream.
+ void PrintLatticeVal(CVPLatticeVal LV, raw_ostream &OS) override {
+ if (LV == getUndefVal())
+ OS << "Undefined ";
+ else if (LV == getOverdefinedVal())
+ OS << "Overdefined";
+ else if (LV == getUntrackedVal())
+ OS << "Untracked ";
+ else
+ OS << "FunctionSet";
+ }
+
+ /// Print the given CVPLatticeKey to the specified stream.
+ void PrintLatticeKey(CVPLatticeKey Key, raw_ostream &OS) override {
+ if (Key.getInt() == IPOGrouping::Register)
+ OS << "<reg> ";
+ else if (Key.getInt() == IPOGrouping::Memory)
+ OS << "<mem> ";
+ else if (Key.getInt() == IPOGrouping::Return)
+ OS << "<ret> ";
+ if (isa<Function>(Key.getPointer()))
+ OS << Key.getPointer()->getName();
+ else
+ OS << *Key.getPointer();
+ }
+
+ /// We collect a set of indirect calls when visiting call sites. This method
+ /// returns a reference to that set.
+ SmallPtrSetImpl<Instruction *> &getIndirectCalls() { return IndirectCalls; }
+
+private:
+ /// Holds the indirect calls we encounter during the analysis. We will attach
+ /// metadata to these calls after the analysis indicating the functions the
+ /// calls can possibly target.
+ SmallPtrSet<Instruction *, 32> IndirectCalls;
+
+ /// Compute a new lattice value for the given constant. The constant, after
+ /// stripping any pointer casts, should be a Function. We ignore null
+ /// pointers as an optimization, since calling these values is undefined
+ /// behavior.
+ CVPLatticeVal computeConstant(Constant *C) {
+ if (isa<ConstantPointerNull>(C))
+ return CVPLatticeVal(CVPLatticeVal::FunctionSet);
+ if (auto *F = dyn_cast<Function>(C->stripPointerCasts()))
+ return CVPLatticeVal({F});
+ return getOverdefinedVal();
+ }
+
+ /// Handle return instructions. The function's return state is the merge of
+ /// the returned value state and the function's return state.
+ void visitReturn(ReturnInst &I,
+ DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ Function *F = I.getParent()->getParent();
+ if (F->getReturnType()->isVoidTy())
+ return;
+ auto RegI = CVPLatticeKey(I.getReturnValue(), IPOGrouping::Register);
+ auto RetF = CVPLatticeKey(F, IPOGrouping::Return);
+ ChangedValues[RetF] =
+ MergeValues(SS.getValueState(RegI), SS.getValueState(RetF));
+ }
+
+ /// Handle call sites. The state of a called function's formal arguments is
+ /// the merge of the argument state with the call sites corresponding actual
+ /// argument state. The call site state is the merge of the call site state
+ /// with the returned value state of the called function.
+ void visitCallSite(CallSite CS,
+ DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ Function *F = CS.getCalledFunction();
+ Instruction *I = CS.getInstruction();
+ auto RegI = CVPLatticeKey(I, IPOGrouping::Register);
+
+ // If this is an indirect call, save it so we can quickly revisit it when
+ // attaching metadata.
+ if (!F)
+ IndirectCalls.insert(I);
+
+ // If we can't track the function's return values, there's nothing to do.
+ if (!F || !canTrackReturnsInterprocedurally(F)) {
+ ChangedValues[RegI] = getOverdefinedVal();
+ return;
+ }
+
+ // Inform the solver that the called function is executable, and perform
+ // the merges for the arguments and return value.
+ SS.MarkBlockExecutable(&F->front());
+ auto RetF = CVPLatticeKey(F, IPOGrouping::Return);
+ for (Argument &A : F->args()) {
+ auto RegFormal = CVPLatticeKey(&A, IPOGrouping::Register);
+ auto RegActual =
+ CVPLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register);
+ ChangedValues[RegFormal] =
+ MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual));
+ }
+ ChangedValues[RegI] =
+ MergeValues(SS.getValueState(RegI), SS.getValueState(RetF));
+ }
+
+ /// Handle select instructions. The select instruction state is the merge the
+ /// true and false value states.
+ void visitSelect(SelectInst &I,
+ DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
+ auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register);
+ auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register);
+ ChangedValues[RegI] =
+ MergeValues(SS.getValueState(RegT), SS.getValueState(RegF));
+ }
+
+ /// Handle load instructions. If the pointer operand of the load is a global
+ /// variable, we attempt to track the value. The loaded value state is the
+ /// merge of the loaded value state with the global variable state.
+ void visitLoad(LoadInst &I,
+ DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
+ if (auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand())) {
+ auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory);
+ ChangedValues[RegI] =
+ MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV));
+ } else {
+ ChangedValues[RegI] = getOverdefinedVal();
+ }
+ }
+
+ /// Handle store instructions. If the pointer operand of the store is a
+ /// global variable, we attempt to track the value. The global variable state
+ /// is the merge of the stored value state with the global variable state.
+ void visitStore(StoreInst &I,
+ DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ auto *GV = dyn_cast<GlobalVariable>(I.getPointerOperand());
+ if (!GV)
+ return;
+ auto RegI = CVPLatticeKey(I.getValueOperand(), IPOGrouping::Register);
+ auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory);
+ ChangedValues[MemGV] =
+ MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV));
+ }
+
+ /// Handle all other instructions. All other instructions are marked
+ /// overdefined.
+ void visitInst(Instruction &I,
+ DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
+ auto RegI = CVPLatticeKey(&I, IPOGrouping::Register);
+ ChangedValues[RegI] = getOverdefinedVal();
+ }
+};
+} // namespace
+
+namespace llvm {
+/// A specialization of LatticeKeyInfo for CVPLatticeKeys. The generic solver
+/// must translate between LatticeKeys and LLVM Values when adding Values to
+/// its work list and inspecting the state of control-flow related values.
+template <> struct LatticeKeyInfo<CVPLatticeKey> {
+ static inline Value *getValueFromLatticeKey(CVPLatticeKey Key) {
+ return Key.getPointer();
+ }
+ static inline CVPLatticeKey getLatticeKeyFromValue(Value *V) {
+ return CVPLatticeKey(V, IPOGrouping::Register);
+ }
+};
+} // namespace llvm
+
+static bool runCVP(Module &M) {
+ // Our custom lattice function and generic sparse propagation solver.
+ CVPLatticeFunc Lattice;
+ SparseSolver<CVPLatticeKey, CVPLatticeVal> Solver(&Lattice);
+
+ // For each function in the module, if we can't track its arguments, let the
+ // generic solver assume it is executable.
+ for (Function &F : M)
+ if (!F.isDeclaration() && !canTrackArgumentsInterprocedurally(&F))
+ Solver.MarkBlockExecutable(&F.front());
+
+ // Solver our custom lattice. In doing so, we will also build a set of
+ // indirect call sites.
+ Solver.Solve();
+
+ // Attach metadata to the indirect call sites that were collected indicating
+ // the set of functions they can possibly target.
+ bool Changed = false;
+ MDBuilder MDB(M.getContext());
+ for (Instruction *C : Lattice.getIndirectCalls()) {
+ CallSite CS(C);
+ auto RegI = CVPLatticeKey(CS.getCalledValue(), IPOGrouping::Register);
+ CVPLatticeVal LV = Solver.getExistingValueState(RegI);
+ if (!LV.isFunctionSet() || LV.getFunctions().empty())
+ continue;
+ MDNode *Callees = MDB.createCallees(SmallVector<Function *, 4>(
+ LV.getFunctions().begin(), LV.getFunctions().end()));
+ C->setMetadata(LLVMContext::MD_callees, Callees);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses CalledValuePropagationPass::run(Module &M,
+ ModuleAnalysisManager &) {
+ runCVP(M);
+ return PreservedAnalyses::all();
+}
+
+namespace {
+class CalledValuePropagationLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ CalledValuePropagationLegacyPass() : ModulePass(ID) {
+ initializeCalledValuePropagationLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ return runCVP(M);
+ }
+};
+} // namespace
+
+char CalledValuePropagationLegacyPass::ID = 0;
+INITIALIZE_PASS(CalledValuePropagationLegacyPass, "called-value-propagation",
+ "Called Value Propagation", false, false)
+
+ModulePass *llvm::createCalledValuePropagationPass() {
+ return new CalledValuePropagationLegacyPass();
+}
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 62b5a9c9ba26..e0b1037053f0 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,16 +19,23 @@
#include "llvm/Transforms/IPO/ConstantMerge.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO.h"
+#include <algorithm>
+#include <cassert>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "constmerge"
@@ -102,8 +109,7 @@ static bool mergeConstants(Module &M) {
// constants together may allow us to merge other constants together if the
// second level constants have initializers which point to the globals that
// were just merged.
- while (1) {
-
+ while (true) {
// First: Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
@@ -225,23 +231,27 @@ PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
}
namespace {
+
struct ConstantMergeLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
+
ConstantMergeLegacyPass() : ModulePass(ID) {
initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry());
}
// For this pass, process all of the globals in the module, eliminating
// duplicate constants.
- bool runOnModule(Module &M) {
+ bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
return mergeConstants(M);
}
};
-}
+
+} // end anonymous namespace
char ConstantMergeLegacyPass::ID = 0;
+
INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge",
"Merge Duplicate Global Constants", false, false)
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index d94aa5da8560..886029ea58d5 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -13,9 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -31,7 +31,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -81,7 +80,7 @@ ConstantInt *CrossDSOCFI::extractNumericTypeId(MDNode *MD) {
void CrossDSOCFI::buildCFICheck(Module &M) {
// FIXME: verify that __cfi_check ends up near the end of the code section,
// but before the jump slots created in LowerTypeTests.
- llvm::DenseSet<uint64_t> TypeIds;
+ SetVector<uint64_t> TypeIds;
SmallVector<MDNode *, 2> Types;
for (GlobalObject &GO : M.global_objects()) {
Types.clear();
@@ -115,6 +114,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
// linker knows about the symbol; this pass replaces the function body.
F->deleteBody();
F->setAlignment(4096);
+
+ Triple T(M.getTargetTriple());
+ if (T.isARM() || T.isThumb())
+ F->addFnAttr("target-features", "+thumb-mode");
+
auto args = F->arg_begin();
Value &CallSiteTypeId = *(args++);
CallSiteTypeId.setName("CallSiteTypeId");
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 8e26849ea9e3..5446541550e5 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -1,4 +1,4 @@
-//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//===- DeadArgumentElimination.cpp - Eliminate dead arguments -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,24 +20,36 @@
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <set>
-#include <tuple>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "deadargelim"
@@ -46,9 +58,10 @@ STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
STATISTIC(NumArgumentsReplacedWithUndef,
"Number of unread args replaced with undef");
+
namespace {
+
/// DAE - The dead argument elimination pass.
- ///
class DAE : public ModulePass {
protected:
// DAH uses this to specify a different ID.
@@ -56,6 +69,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
+
DAE() : ModulePass(ID) {
initializeDAEPass(*PassRegistry::getPassRegistry());
}
@@ -71,33 +85,38 @@ namespace {
virtual bool ShouldHackArguments() const { return false; }
};
-}
+} // end anonymous namespace
char DAE::ID = 0;
+
INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
namespace {
+
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
/// deletes arguments to functions which are external. This is only for use
/// by bugpoint.
struct DAH : public DAE {
static char ID;
+
DAH() : DAE(ID) {}
bool ShouldHackArguments() const override { return true; }
};
-}
+
+} // end anonymous namespace
char DAH::ID = 0;
+
INITIALIZE_PASS(DAH, "deadarghaX0r",
"Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
false, false)
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
-///
ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+
ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
@@ -140,7 +159,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// the old function, but doesn't have isVarArg set.
FunctionType *FTy = Fn.getFunctionType();
- std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
+ std::vector<Type *> Params(FTy->param_begin(), FTy->param_end());
FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
Params, false);
unsigned NumArgs = Params.size();
@@ -155,7 +174,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Loop over all of the callers of the function, transforming the call sites
// to pass in a smaller number of arguments into the new function.
//
- std::vector<Value*> Args;
+ std::vector<Value *> Args;
for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
CallSite CS(*I++);
if (!CS)
@@ -214,7 +233,6 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well. While we're at
// it, remove the dead arguments from the DeadArguments list.
- //
for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
I2 = NF->arg_begin(); I != E; ++I, ++I2) {
// Move the name and users over to the new version.
@@ -343,7 +361,6 @@ DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
return MaybeLive;
}
-
/// SurveyUse - This looks at a single use of an argument or return value
/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
/// if it causes the used value to become MaybeLive.
@@ -460,7 +477,6 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
//
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
-//
void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Functions with inalloca parameters are expecting args in a particular
// register and memory layout.
@@ -478,11 +494,14 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
}
unsigned RetCount = NumRetVals(&F);
+
// Assume all return values are dead
- typedef SmallVector<Liveness, 5> RetVals;
+ using RetVals = SmallVector<Liveness, 5>;
+
RetVals RetValLiveness(RetCount, MaybeLive);
- typedef SmallVector<UseVector, 5> RetUses;
+ using RetUses = SmallVector<UseVector, 5>;
+
// These vectors map each return value to the uses that make it MaybeLive, so
// we can add those to the Uses map if the return value really turns out to be
// MaybeLive. Initialized to a list of RetCount empty lists.
@@ -601,15 +620,15 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses) {
switch (L) {
- case Live: MarkLive(RA); break;
+ case Live:
+ MarkLive(RA);
+ break;
case MaybeLive:
- {
// Note any uses of this value, so this return value can be
// marked live whenever one of the uses becomes live.
for (const auto &MaybeLiveUse : MaybeLiveUses)
Uses.insert(std::make_pair(MaybeLiveUse, RA));
break;
- }
}
}
@@ -762,7 +781,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// One return type? Just a simple value then, but only if we didn't use to
// return a struct with that simple value before.
NRetTy = RetTypes.front();
- else if (RetTypes.size() == 0)
+ else if (RetTypes.empty())
// No return types? Make it void, but only if we didn't use to return {}.
NRetTy = Type::getVoidTy(F->getContext());
}
@@ -808,7 +827,6 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Loop over all of the callers of the function, transforming the call sites
// to pass in a smaller number of arguments into the new function.
- //
std::vector<Value*> Args;
while (!F->use_empty()) {
CallSite CS(F->user_back());
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index ecff88c88dcb..d5fef59286dd 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,5 +1,4 @@
-//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
-//----------------===//
+//===- ElimAvailExtern.cpp - DCE unreachable internal functions -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,11 +14,15 @@
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+
using namespace llvm;
#define DEBUG_TYPE "elim-avail-extern"
@@ -69,8 +72,10 @@ EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
}
namespace {
+
struct EliminateAvailableExternallyLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
+
EliminateAvailableExternallyLegacyPass() : ModulePass(ID) {
initializeEliminateAvailableExternallyLegacyPassPass(
*PassRegistry::getPassRegistry());
@@ -78,16 +83,17 @@ struct EliminateAvailableExternallyLegacyPass : public ModulePass {
// run - Do the EliminateAvailableExternally pass on the specified module,
// optionally updating the specified callgraph to reflect the changes.
- //
- bool runOnModule(Module &M) {
+ bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
return eliminateAvailableExternally(M);
}
};
-}
+
+} // end anonymous namespace
char EliminateAvailableExternallyLegacyPass::ID = 0;
+
INITIALIZE_PASS(EliminateAvailableExternallyLegacyPass, "elim-avail-extern",
"Eliminate Available Externally Globals", false, false)
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index d1147f7d844b..042cacb70ad0 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -12,8 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 968712138208..325a5d77aadb 100644
--- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -52,11 +52,13 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
.Case("returns_twice", Attribute::ReturnsTwice)
.Case("safestack", Attribute::SafeStack)
.Case("sanitize_address", Attribute::SanitizeAddress)
+ .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
.Case("sanitize_memory", Attribute::SanitizeMemory)
.Case("sanitize_thread", Attribute::SanitizeThread)
.Case("ssp", Attribute::StackProtect)
.Case("sspreq", Attribute::StackProtectReq)
.Case("sspstrong", Attribute::StackProtectStrong)
+ .Case("strictfp", Attribute::StrictFP)
.Case("uwtable", Attribute::UWTable)
.Default(Attribute::None);
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 813a4b6e2831..5352e32479bb 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -6,34 +6,61 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-///
+//
/// \file
/// This file implements interprocedural passes which walk the
/// call-graph deducing and/or propagating function attributes.
-///
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include <cassert>
+#include <iterator>
+#include <map>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "functionattrs"
@@ -57,8 +84,10 @@ static cl::opt<bool> EnableNonnullArgPropagation(
"caller functions."));
namespace {
-typedef SmallSetVector<Function *, 8> SCCNodeSet;
-}
+
+using SCCNodeSet = SmallSetVector<Function *, 8>;
+
+} // end anonymous namespace
/// Returns the memory access attribute for function F using AAR for AA results,
/// where SCCNodes is the current SCC.
@@ -101,17 +130,18 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
SCCNodes.count(CS.getCalledFunction()))
continue;
FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
+ ModRefInfo MRI = createModRefInfo(MRB);
// If the call doesn't access memory, we're done.
- if (!(MRB & MRI_ModRef))
+ if (isNoModRef(MRI))
continue;
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
// The call could access any memory. If that includes writes, give up.
- if (MRB & MRI_Mod)
+ if (isModSet(MRI))
return MAK_MayWrite;
// If it reads, note it.
- if (MRB & MRI_Ref)
+ if (isRefSet(MRI))
ReadsMemory = true;
continue;
}
@@ -133,10 +163,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
- if (MRB & MRI_Mod)
+ if (isModSet(MRI))
// Writes non-local memory. Give up.
return MAK_MayWrite;
- if (MRB & MRI_Ref)
+ if (isRefSet(MRI))
// Ok, it reads non-local memory.
ReadsMemory = true;
}
@@ -237,6 +267,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
}
namespace {
+
/// For a given pointer Argument, this retains a list of Arguments of functions
/// in the same SCC that the pointer data flows into. We use this to build an
/// SCC of the arguments.
@@ -248,7 +279,7 @@ struct ArgumentGraphNode {
class ArgumentGraph {
// We store pointers to ArgumentGraphNode objects, so it's important that
// that they not move around upon insert.
- typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
+ using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
ArgumentMapTy ArgumentMap;
@@ -263,7 +294,7 @@ class ArgumentGraph {
public:
ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
- typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
+ using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
iterator begin() { return SyntheticRoot.Uses.begin(); }
iterator end() { return SyntheticRoot.Uses.end(); }
@@ -281,8 +312,7 @@ public:
/// consider that a capture, instead adding it to the "Uses" list and
/// continuing with the analysis.
struct ArgumentUsesTracker : public CaptureTracker {
- ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
- : Captured(false), SCCNodes(SCCNodes) {}
+ ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
void tooManyUses() override { Captured = true; }
@@ -331,37 +361,45 @@ struct ArgumentUsesTracker : public CaptureTracker {
return false;
}
- bool Captured; // True only if certainly captured (used outside our SCC).
- SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+ // True only if certainly captured (used outside our SCC).
+ bool Captured = false;
+
+ // Uses within our SCC.
+ SmallVector<Argument *, 4> Uses;
const SCCNodeSet &SCCNodes;
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
template <> struct GraphTraits<ArgumentGraphNode *> {
- typedef ArgumentGraphNode *NodeRef;
- typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
+ using NodeRef = ArgumentGraphNode *;
+ using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
static NodeRef getEntryNode(NodeRef A) { return A; }
static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
};
+
template <>
struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
+
static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
return AG->begin();
}
+
static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
};
-}
+
+} // end namespace llvm
/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
determinePointerReadAttrs(Argument *A,
const SmallPtrSet<Argument *, 8> &SCCNodes) {
-
SmallVector<Use *, 32> Worklist;
SmallSet<Use *, 32> Visited;
@@ -502,8 +540,8 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
continue;
// There is nothing to do if an argument is already marked as 'returned'.
- if (any_of(F->args(),
- [](const Argument &Arg) { return Arg.hasReturnedAttr(); }))
+ if (llvm::any_of(F->args(),
+ [](const Argument &Arg) { return Arg.hasReturnedAttr(); }))
continue;
auto FindRetArg = [&]() -> Value * {
@@ -884,11 +922,13 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
FlowsToReturn.insert(Ret->getReturnValue());
+ auto &DL = F->getParent()->getDataLayout();
+
for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
Value *RetVal = FlowsToReturn[i];
// If this value is locally known to be non-null, we're good
- if (isKnownNonNull(RetVal))
+ if (isKnownNonZero(RetVal, DL))
continue;
// Otherwise, we need to look upwards since we can't make any local
@@ -1135,8 +1175,11 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
}
namespace {
+
struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
- static char ID; // Pass identification, replacement for typeid
+ // Pass identification, replacement for typeid
+ static char ID;
+
PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) {
initializePostOrderFunctionAttrsLegacyPassPass(
*PassRegistry::getPassRegistry());
@@ -1151,7 +1194,8 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
CallGraphSCCPass::getAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char PostOrderFunctionAttrsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs",
@@ -1214,8 +1258,11 @@ bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
}
namespace {
+
struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
+ // Pass identification, replacement for typeid
+ static char ID;
+
ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) {
initializeReversePostOrderFunctionAttrsLegacyPassPass(
*PassRegistry::getPassRegistry());
@@ -1229,9 +1276,11 @@ struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
AU.addPreserved<CallGraphWrapperPass>();
}
};
-}
+
+} // end anonymous namespace
char ReversePostOrderFunctionAttrsLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs",
"Deduce function attributes in RPO", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
@@ -1291,7 +1340,7 @@ static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
}
bool Changed = false;
- for (auto *F : reverse(Worklist))
+ for (auto *F : llvm::reverse(Worklist))
Changed |= addNoRecurseAttrsTopDown(*F);
return Changed;
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 233a36d2bc54..b6d6201cd23b 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -12,30 +12,54 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionImport.h"
-
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
-#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Linker/IRMover.h"
+#include "llvm/Object/ModuleSymbolTable.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
-
-#define DEBUG_TYPE "function-import"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <cassert>
+#include <memory>
+#include <set>
+#include <string>
+#include <system_error>
+#include <tuple>
+#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "function-import"
+
STATISTIC(NumImportedFunctions, "Number of functions imported");
STATISTIC(NumImportedModules, "Number of modules imported from");
STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
@@ -61,7 +85,7 @@ static cl::opt<float> ImportHotInstrFactor(
"before processing newly imported functions"));
static cl::opt<float> ImportHotMultiplier(
- "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"),
+ "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
static cl::opt<float> ImportCriticalMultiplier(
@@ -91,6 +115,18 @@ static cl::opt<bool> EnableImportMetadata(
),
cl::Hidden, cl::desc("Enable import metadata like 'thinlto_src_module'"));
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+ SummaryFile("summary-file",
+ cl::desc("The summary file to use for function importing."));
+
+/// Used when testing importing from distributed indexes via opt
+// -function-import.
+static cl::opt<bool>
+ ImportAllIndex("import-all-index",
+ cl::desc("Import all external functions in index."));
+
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
@@ -109,8 +145,6 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
return Result;
}
-namespace {
-
/// Given a list of possible callee implementation for a call site, select one
/// that fits the \p Threshold.
///
@@ -129,21 +163,26 @@ selectCallee(const ModuleSummaryIndex &Index,
CalleeSummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
auto *GVSummary = SummaryPtr.get();
+ // For SamplePGO, in computeImportForFunction the OriginalId
+ // may have been used to locate the callee summary list (See
+ // comment there).
+ // The mapping from OriginalId to GUID may return a GUID
+ // that corresponds to a static variable. Filter it out here.
+ // This can happen when
+ // 1) There is a call to a library function which is not defined
+ // in the index.
+ // 2) There is a static variable with the OriginalGUID identical
+ // to the GUID of the library function in 1);
+ // When this happens, the logic for SamplePGO kicks in and
+ // the static variable in 2) will be found, which needs to be
+ // filtered out.
+ if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
+ return false;
if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
// There is no point in importing these, we can't inline them
return false;
- if (auto *AS = dyn_cast<AliasSummary>(GVSummary)) {
- GVSummary = &AS->getAliasee();
- // Alias can't point to "available_externally". However when we import
- // linkOnceODR the linkage does not change. So we import the alias
- // and aliasee only in this case.
- // FIXME: we should import alias as available_externally *function*,
- // the destination module does need to know it is an alias.
- if (!GlobalValue::isLinkOnceODRLinkage(GVSummary->linkage()))
- return false;
- }
- auto *Summary = cast<FunctionSummary>(GVSummary);
+ auto *Summary = cast<FunctionSummary>(GVSummary->getBaseObject());
// If this is a local function, make sure we import the copy
// in the caller's module. The only time a local function can
@@ -174,9 +213,28 @@ selectCallee(const ModuleSummaryIndex &Index,
return cast<GlobalValueSummary>(It->get());
}
+namespace {
+
using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
GlobalValue::GUID>;
+} // anonymous namespace
+
+static ValueInfo
+updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
+ if (!VI.getSummaryList().empty())
+ return VI;
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ // FIXME: Consider updating the edges in the graph after building
+ // it, rather than needing to perform this mapping on each walk.
+ auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
+ if (GUID == 0)
+ return nullptr;
+ return Index.getValueInfo(GUID);
+}
+
/// Compute the list of functions to import for a given caller. Mark these
/// imported functions and the symbols they reference in their source module as
/// exported from their source module.
@@ -191,17 +249,9 @@ static void computeImportForFunction(
DEBUG(dbgs() << " edge -> " << VI.getGUID() << " Threshold:" << Threshold
<< "\n");
- if (VI.getSummaryList().empty()) {
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
- if (GUID == 0)
- continue;
- VI = Index.getValueInfo(GUID);
- if (!VI)
- continue;
- }
+ VI = updateValueInfoForIndirectCalls(Index, VI);
+ if (!VI)
+ continue;
if (DefinedGVSummaries.count(VI.getGUID())) {
DEBUG(dbgs() << "ignored! Target already in destination module.\n");
@@ -227,16 +277,9 @@ static void computeImportForFunction(
DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
continue;
}
- // "Resolve" the summary, traversing alias,
- const FunctionSummary *ResolvedCalleeSummary;
- if (isa<AliasSummary>(CalleeSummary)) {
- ResolvedCalleeSummary = cast<FunctionSummary>(
- &cast<AliasSummary>(CalleeSummary)->getAliasee());
- assert(
- GlobalValue::isLinkOnceODRLinkage(ResolvedCalleeSummary->linkage()) &&
- "Unexpected alias to a non-linkonceODR in import list");
- } else
- ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
+
+ // "Resolve" the summary
+ const auto *ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary->getBaseObject());
assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
"selectCallee() didn't honor the threshold");
@@ -312,14 +355,12 @@ static void ComputeImportForModule(
DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
continue;
}
- auto *Summary = GVSummary.second;
- if (auto *AS = dyn_cast<AliasSummary>(Summary))
- Summary = &AS->getAliasee();
- auto *FuncSummary = dyn_cast<FunctionSummary>(Summary);
+ auto *FuncSummary =
+ dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
if (!FuncSummary)
// Skip import for global variables
continue;
- DEBUG(dbgs() << "Initalize import for " << GVSummary.first << "\n");
+ DEBUG(dbgs() << "Initialize import for " << GVSummary.first << "\n");
computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
DefinedGVSummaries, Worklist, ImportList,
ExportLists);
@@ -344,8 +385,6 @@ static void ComputeImportForModule(
}
}
-} // anonymous namespace
-
/// Compute all the import and export for every module using the Index.
void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
@@ -395,11 +434,23 @@ void llvm::ComputeCrossModuleImport(
#endif
}
+#ifndef NDEBUG
+static void dumpImportListForModule(StringRef ModulePath,
+ FunctionImporter::ImportMapTy &ImportList) {
+ DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
+ << ImportList.size() << " modules.\n");
+ for (auto &Src : ImportList) {
+ auto SrcModName = Src.first();
+ DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
+ << SrcModName << "\n");
+ }
+}
+#endif
+
/// Compute all the imports for the given module in the Index.
void llvm::ComputeCrossModuleImportForModule(
StringRef ModulePath, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList) {
-
// Collect the list of functions this module defines.
// GUID -> Summary
GVSummaryMapTy FunctionSummaryMap;
@@ -410,13 +461,34 @@ void llvm::ComputeCrossModuleImportForModule(
ComputeImportForModule(FunctionSummaryMap, Index, ImportList);
#ifndef NDEBUG
- DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
- << ImportList.size() << " modules.\n");
- for (auto &Src : ImportList) {
- auto SrcModName = Src.first();
- DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
- << SrcModName << "\n");
+ dumpImportListForModule(ModulePath, ImportList);
+#endif
+}
+
+// Mark all external summaries in Index for import into the given module.
+// Used for distributed builds using a distributed index.
+void llvm::ComputeCrossModuleImportForModuleFromIndex(
+ StringRef ModulePath, const ModuleSummaryIndex &Index,
+ FunctionImporter::ImportMapTy &ImportList) {
+ for (auto &GlobalList : Index) {
+ // Ignore entries for undefined references.
+ if (GlobalList.second.SummaryList.empty())
+ continue;
+
+ auto GUID = GlobalList.first;
+ assert(GlobalList.second.SummaryList.size() == 1 &&
+ "Expected individual combined index to have one summary per GUID");
+ auto &Summary = GlobalList.second.SummaryList[0];
+ // Skip the summaries for the importing module. These are included to
+ // e.g. record required linkage changes.
+ if (Summary->modulePath() == ModulePath)
+ continue;
+ // Doesn't matter what value we plug in to the map, just needs an entry
+ // to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()][GUID] = 1;
}
+#ifndef NDEBUG
+ dumpImportListForModule(ModulePath, ImportList);
#endif
}
@@ -453,6 +525,17 @@ void llvm::computeDeadSymbols(
// Make value live and add it to the worklist if it was not live before.
// FIXME: we should only make the prevailing copy live here
auto visit = [&](ValueInfo VI) {
+ // FIXME: If we knew which edges were created for indirect call profiles,
+ // we could skip them here. Any that are live should be reached via
+ // other edges, e.g. reference edges. Otherwise, using a profile collected
+ // on a slightly different binary might provoke preserving, importing
+ // and ultimately promoting calls to functions not linked into this
+ // binary, which increases the binary size unnecessarily. Note that
+ // if this code changes, the importer needs to change so that edges
+ // to functions marked dead are skipped.
+ VI = updateValueInfoForIndirectCalls(Index, VI);
+ if (!VI)
+ return;
for (auto &S : VI.getSummaryList())
if (S->isLive())
return;
@@ -465,17 +548,12 @@ void llvm::computeDeadSymbols(
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
for (auto &Summary : VI.getSummaryList()) {
- for (auto Ref : Summary->refs())
+ GlobalValueSummary *Base = Summary->getBaseObject();
+ for (auto Ref : Base->refs())
visit(Ref);
- if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
+ if (auto *FS = dyn_cast<FunctionSummary>(Base))
for (auto Call : FS->calls())
visit(Call.first);
- if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
- auto AliaseeGUID = AS->getAliasee().getOriginalName();
- ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID);
- if (AliaseeVI)
- visit(AliaseeVI);
- }
}
}
Index.setWithGlobalValueDeadStripping();
@@ -600,23 +678,9 @@ void llvm::thinLTOResolveWeakForLinkerModule(
/// Run internalization on \p TheModule based on symmary analysis.
void llvm::thinLTOInternalizeModule(Module &TheModule,
const GVSummaryMapTy &DefinedGlobals) {
- // Parse inline ASM and collect the list of symbols that are not defined in
- // the current module.
- StringSet<> AsmUndefinedRefs;
- ModuleSymbolTable::CollectAsmSymbols(
- TheModule,
- [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
- if (Flags & object::BasicSymbolRef::SF_Undefined)
- AsmUndefinedRefs.insert(Name);
- });
-
// Declare a callback for the internalize pass that will ask for every
// candidate GlobalValue if it can be internalized or not.
auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
- // Can't be internalized if referenced in inline asm.
- if (AsmUndefinedRefs.count(GV.getName()))
- return true;
-
// Lookup the linkage recorded in the summaries during global analysis.
auto GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end()) {
@@ -647,12 +711,25 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// FIXME: See if we can just internalize directly here via linkage changes
// based on the index, rather than invoking internalizeModule.
- llvm::internalizeModule(TheModule, MustPreserveGV);
+ internalizeModule(TheModule, MustPreserveGV);
+}
+
+/// Make alias a clone of its aliasee.
+static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
+ Function *Fn = cast<Function>(GA->getBaseObject());
+
+ ValueToValueMapTy VMap;
+ Function *NewFn = CloneFunction(Fn, VMap);
+ // Clone should use the original alias's linkage and name, and we ensure
+ // all uses of alias instead use the new clone (casted if necessary).
+ NewFn->setLinkage(GA->getLinkage());
+ GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType()));
+ NewFn->takeName(GA);
+ return NewFn;
}
// Automatically import functions in Module \p DestModule based on the summaries
// index.
-//
Expected<bool> FunctionImporter::importFunctions(
Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
DEBUG(dbgs() << "Starting import for Module "
@@ -699,10 +776,9 @@ Expected<bool> FunctionImporter::importFunctions(
// Add 'thinlto_src_module' metadata for statistics and debugging.
F.setMetadata(
"thinlto_src_module",
- llvm::MDNode::get(
- DestModule.getContext(),
- {llvm::MDString::get(DestModule.getContext(),
- SrcModule->getSourceFileName())}));
+ MDNode::get(DestModule.getContext(),
+ {MDString::get(DestModule.getContext(),
+ SrcModule->getSourceFileName())}));
}
GlobalsToImport.insert(&F);
}
@@ -722,13 +798,6 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
for (GlobalAlias &GA : SrcModule->aliases()) {
- // FIXME: This should eventually be controlled entirely by the summary.
- if (FunctionImportGlobalProcessing::doImportAsDefinition(
- &GA, &GlobalsToImport)) {
- GlobalsToImport.insert(&GA);
- continue;
- }
-
if (!GA.hasName())
continue;
auto GUID = GA.getGUID();
@@ -737,25 +806,25 @@ Expected<bool> FunctionImporter::importFunctions(
<< " " << GA.getName() << " from "
<< SrcModule->getSourceFileName() << "\n");
if (Import) {
- // Alias can't point to "available_externally". However when we import
- // linkOnceODR the linkage does not change. So we import the alias
- // and aliasee only in this case. This has been handled by
- // computeImportForFunction()
- GlobalObject *GO = GA.getBaseObject();
- assert(GO->hasLinkOnceODRLinkage() &&
- "Unexpected alias to a non-linkonceODR in import list");
-#ifndef NDEBUG
- if (!GlobalsToImport.count(GO))
- DEBUG(dbgs() << " alias triggers importing aliasee " << GO->getGUID()
- << " " << GO->getName() << " from "
- << SrcModule->getSourceFileName() << "\n");
-#endif
- if (Error Err = GO->materialize())
- return std::move(Err);
- GlobalsToImport.insert(GO);
if (Error Err = GA.materialize())
return std::move(Err);
- GlobalsToImport.insert(&GA);
+ // Import alias as a copy of its aliasee.
+ GlobalObject *Base = GA.getBaseObject();
+ if (Error Err = Base->materialize())
+ return std::move(Err);
+ auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
+ DEBUG(dbgs() << "Is importing aliasee fn " << Base->getGUID()
+ << " " << Base->getName() << " from "
+ << SrcModule->getSourceFileName() << "\n");
+ if (EnableImportMetadata) {
+ // Add 'thinlto_src_module' metadata for statistics and debugging.
+ Fn->setMetadata(
+ "thinlto_src_module",
+ MDNode::get(DestModule.getContext(),
+ {MDString::get(DestModule.getContext(),
+ SrcModule->getSourceFileName())}));
+ }
+ GlobalsToImport.insert(Fn);
}
}
@@ -789,12 +858,6 @@ Expected<bool> FunctionImporter::importFunctions(
return ImportedCount;
}
-/// Summary file to use for function importing when using -function-import from
-/// the command line.
-static cl::opt<std::string>
- SummaryFile("summary-file",
- cl::desc("The summary file to use for function importing."));
-
static bool doImportingForModule(Module &M) {
if (SummaryFile.empty())
report_fatal_error("error: -function-import requires -summary-file\n");
@@ -809,8 +872,15 @@ static bool doImportingForModule(Module &M) {
// First step is collecting the import list.
FunctionImporter::ImportMapTy ImportList;
- ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
- ImportList);
+ // If requested, simply import all functions in the index. This is used
+ // when testing distributed backend handling via the opt tool, when
+ // we have distributed indexes containing exactly the summaries to import.
+ if (ImportAllIndex)
+ ComputeCrossModuleImportForModuleFromIndex(M.getModuleIdentifier(), *Index,
+ ImportList);
+ else
+ ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
+ ImportList);
// Conservatively mark all internal values as promoted. This interface is
// only used when doing importing via the function importing pass. The pass
@@ -848,17 +918,18 @@ static bool doImportingForModule(Module &M) {
}
namespace {
+
/// Pass that performs cross-module function import provided a summary file.
class FunctionImportLegacyPass : public ModulePass {
public:
/// Pass identification, replacement for typeid
static char ID;
+ explicit FunctionImportLegacyPass() : ModulePass(ID) {}
+
/// Specify pass name for debug output
StringRef getPassName() const override { return "Function Importing"; }
- explicit FunctionImportLegacyPass() : ModulePass(ID) {}
-
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
@@ -866,7 +937,8 @@ public:
return doImportingForModule(M);
}
};
-} // anonymous namespace
+
+} // end anonymous namespace
PreservedAnalyses FunctionImportPass::run(Module &M,
ModuleAnalysisManager &AM) {
@@ -881,7 +953,9 @@ INITIALIZE_PASS(FunctionImportLegacyPass, "function-import",
"Summary Based Function Import", false, false)
namespace llvm {
+
Pass *createFunctionImportPass() {
return new FunctionImportLegacyPass();
}
-}
+
+} // end namespace llvm
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index c91e8b454927..ada9eb80e680 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -18,7 +18,6 @@
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
@@ -115,7 +114,7 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
ComputeDependencies(User, Deps);
Deps.erase(&GV); // Remove self-reference.
for (GlobalValue *GVU : Deps) {
- GVDependencies.insert(std::make_pair(GVU, &GV));
+ GVDependencies[GVU].insert(&GV);
}
}
@@ -199,8 +198,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
AliveGlobals.end()};
while (!NewLiveGVs.empty()) {
GlobalValue *LGV = NewLiveGVs.pop_back_val();
- for (auto &&GVD : make_range(GVDependencies.equal_range(LGV)))
- MarkLive(*GVD.second, &NewLiveGVs);
+ for (auto *GVD : GVDependencies[LGV])
+ MarkLive(*GVD, &NewLiveGVs);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 93eab680ca6b..4bb2984e3b47 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -20,22 +20,41 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -45,7 +64,11 @@
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "globalopt"
@@ -139,7 +162,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
}
V = I->getOperand(0);
- } while (1);
+ } while (true);
}
/// This GV is a pointer root. Loop over all users of the global and clean up
@@ -220,7 +243,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
break;
I->eraseFromParent();
I = J;
- } while (1);
+ } while (true);
I->eraseFromParent();
}
}
@@ -348,7 +371,6 @@ static bool isSafeSROAElementUse(Value *V) {
return true;
}
-
/// U is a direct user of the specified global value. Look at it and its uses
/// and decide whether it is safe to SROA this global.
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
@@ -402,11 +424,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
}
}
- for (User *UU : U->users())
- if (!isSafeSROAElementUse(UU))
- return false;
-
- return true;
+ return llvm::all_of(U->users(),
+ [](User *UU) { return isSafeSROAElementUse(UU); });
}
/// Look at all uses of the global and decide whether it is safe for us to
@@ -419,6 +438,27 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
return true;
}
+/// Copy over the debug info for a variable to its SRA replacements.
+static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
+ uint64_t FragmentOffsetInBits,
+ uint64_t FragmentSizeInBits,
+ unsigned NumElements) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVs;
+ GV->getDebugInfo(GVs);
+ for (auto *GVE : GVs) {
+ DIVariable *Var = GVE->getVariable();
+ DIExpression *Expr = GVE->getExpression();
+ if (NumElements > 1) {
+ if (auto E = DIExpression::createFragmentExpression(
+ Expr, FragmentOffsetInBits, FragmentSizeInBits))
+ Expr = *E;
+ else
+ return;
+ }
+ auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
+ NGV->addDebugInfo(NGVE);
+ }
+}
/// Perform scalar replacement of aggregates on the specified global variable.
/// This opens the door for other optimizations by exposing the behavior of the
@@ -434,7 +474,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
Constant *Init = GV->getInitializer();
Type *Ty = Init->getType();
- std::vector<GlobalVariable*> NewGlobals;
+ std::vector<GlobalVariable *> NewGlobals;
Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
// Get the alignment of the global, either explicit or target-specific.
@@ -443,9 +483,11 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
StartAlignment = DL.getABITypeAlignment(GV->getType());
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- NewGlobals.reserve(STy->getNumElements());
+ uint64_t FragmentOffset = 0;
+ unsigned NumElements = STy->getNumElements();
+ NewGlobals.reserve(NumElements);
const StructLayout &Layout = *DL.getStructLayout(STy);
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
@@ -465,15 +507,22 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
NGV->setAlignment(NewAlign);
+
+ // Copy over the debug info for the variable.
+ FragmentOffset = alignTo(FragmentOffset, NewAlign);
+ uint64_t Size = DL.getTypeSizeInBits(NGV->getValueType());
+ transferSRADebugInfo(GV, NGV, FragmentOffset, Size, NumElements);
+ FragmentOffset += Size;
}
} else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
unsigned NumElements = STy->getNumElements();
if (NumElements > 16 && GV->hasNUsesOrMore(16))
return nullptr; // It's not worth it.
NewGlobals.reserve(NumElements);
-
- uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
- unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType());
+ auto ElTy = STy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(ElTy);
+ unsigned EltAlign = DL.getABITypeAlignment(ElTy);
+ uint64_t FragmentSizeInBits = DL.getTypeSizeInBits(ElTy);
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
@@ -494,6 +543,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
+ transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
+ NumElements);
}
}
@@ -689,7 +740,6 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
return Changed;
}
-
/// The specified global has only one non-null value stored into it. If there
/// are uses of the loaded value that would trap if the loaded value is
/// dynamically null, then we know that they cannot be reachable with a null
@@ -1045,7 +1095,6 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
return true;
}
-
/// If all users of values loaded from GV are simple enough to perform HeapSRA,
/// return true.
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
@@ -1095,9 +1144,9 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
}
static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
- DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
- std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+ DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
+ std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
if (FieldNo >= FieldVals.size())
FieldVals.resize(FieldNo+1);
@@ -1139,8 +1188,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
/// Given a load instruction and a value derived from the load, rewrite the
/// derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
- DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
// If this is a comparison against null, handle it.
if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
@@ -1187,7 +1236,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// processed.
PHINode *PN = cast<PHINode>(LoadUser);
if (!InsertedScalarizedValues.insert(std::make_pair(PN,
- std::vector<Value*>())).second)
+ std::vector<Value *>())).second)
return;
// If this is the first time we've seen this PHI, recursively process all
@@ -1202,8 +1251,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
- DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+ DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
+ std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
@@ -1232,8 +1281,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Okay, at this point, there are no users of the malloc. Insert N
// new mallocs at the same place as CI, and N globals.
- std::vector<Value*> FieldGlobals;
- std::vector<Value*> FieldMallocs;
+ std::vector<Value *> FieldGlobals;
+ std::vector<Value *> FieldMallocs;
SmallVector<OperandBundleDef, 1> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
@@ -1330,10 +1379,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// As we process loads, if we can't immediately update all uses of the load,
/// keep track of what scalarized loads are inserted for a given load.
- DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+ DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
- std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+ std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
@@ -1379,7 +1428,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
}
// Drop all inter-phi links and any loads that made it this far.
- for (DenseMap<Value*, std::vector<Value*> >::iterator
+ for (DenseMap<Value *, std::vector<Value *>>::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I->first))
@@ -1389,7 +1438,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
}
// Delete all the phis and loads now that inter-references are dead.
- for (DenseMap<Value*, std::vector<Value*> >::iterator
+ for (DenseMap<Value *, std::vector<Value *>>::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I->first))
@@ -1576,12 +1625,57 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
"No reason to shrink to bool!");
+ SmallVector<DIGlobalVariableExpression *, 1> GVs;
+ GV->getDebugInfo(GVs);
+
// If initialized to zero and storing one into the global, we can use a cast
// instead of a select to synthesize the desired value.
bool IsOneZero = false;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+ bool EmitOneOrZero = true;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)){
IsOneZero = InitVal->isNullValue() && CI->isOne();
+ if (ConstantInt *CIInit = dyn_cast<ConstantInt>(GV->getInitializer())){
+ uint64_t ValInit = CIInit->getZExtValue();
+ uint64_t ValOther = CI->getZExtValue();
+ uint64_t ValMinus = ValOther - ValInit;
+
+ for(auto *GVe : GVs){
+ DIGlobalVariable *DGV = GVe->getVariable();
+ DIExpression *E = GVe->getExpression();
+
+ // It is expected that the address of global optimized variable is on
+ // top of the stack. After optimization, value of that variable will
+ // be ether 0 for initial value or 1 for other value. The following
+ // expression should return constant integer value depending on the
+ // value at global object address:
+ // val * (ValOther - ValInit) + ValInit:
+ // DW_OP_deref DW_OP_constu <ValMinus>
+ // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
+ E = DIExpression::get(NewGV->getContext(),
+ {dwarf::DW_OP_deref,
+ dwarf::DW_OP_constu,
+ ValMinus,
+ dwarf::DW_OP_mul,
+ dwarf::DW_OP_constu,
+ ValInit,
+ dwarf::DW_OP_plus,
+ dwarf::DW_OP_stack_value});
+ DIGlobalVariableExpression *DGVE =
+ DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
+ NewGV->addDebugInfo(DGVE);
+ }
+ EmitOneOrZero = false;
+ }
+ }
+
+ if (EmitOneOrZero) {
+ // FIXME: This will only emit address for debugger on which will
+ // be written only 0 or 1.
+ for(auto *GV : GVs)
+ NewGV->addDebugInfo(GV);
+ }
+
while (!GV->use_empty()) {
Instruction *UI = cast<Instruction>(GV->user_back());
if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
@@ -2202,7 +2296,7 @@ static void setUsedInitializer(GlobalVariable &V,
// Type of pointer to the array of pointers.
PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
- SmallVector<llvm::Constant *, 8> UsedArray;
+ SmallVector<Constant *, 8> UsedArray;
for (GlobalValue *GV : Init) {
Constant *Cast
= ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
@@ -2215,14 +2309,15 @@ static void setUsedInitializer(GlobalVariable &V,
Module *M = V.getParent();
V.removeFromParent();
GlobalVariable *NV =
- new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
- llvm::ConstantArray::get(ATy, UsedArray), "");
+ new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
+ ConstantArray::get(ATy, UsedArray), "");
NV->takeName(&V);
NV->setSection("llvm.metadata");
delete &V;
}
namespace {
+
/// An easy to access representation of llvm.used and llvm.compiler.used.
class LLVMUsed {
SmallPtrSet<GlobalValue *, 8> Used;
@@ -2235,25 +2330,34 @@ public:
UsedV = collectUsedGlobalVariables(M, Used, false);
CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
}
- typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
- typedef iterator_range<iterator> used_iterator_range;
+
+ using iterator = SmallPtrSet<GlobalValue *, 8>::iterator;
+ using used_iterator_range = iterator_range<iterator>;
+
iterator usedBegin() { return Used.begin(); }
iterator usedEnd() { return Used.end(); }
+
used_iterator_range used() {
return used_iterator_range(usedBegin(), usedEnd());
}
+
iterator compilerUsedBegin() { return CompilerUsed.begin(); }
iterator compilerUsedEnd() { return CompilerUsed.end(); }
+
used_iterator_range compilerUsed() {
return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
}
+
bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
+
bool compilerUsedCount(GlobalValue *GV) const {
return CompilerUsed.count(GV);
}
+
bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
+
bool compilerUsedInsert(GlobalValue *GV) {
return CompilerUsed.insert(GV).second;
}
@@ -2265,7 +2369,8 @@ public:
setUsedInitializer(*CompilerUsedV, CompilerUsed);
}
};
-}
+
+} // end anonymous namespace
static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
if (GA.use_empty()) // No use at all.
@@ -2580,8 +2685,10 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
}
namespace {
+
struct GlobalOptLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
+
GlobalOptLegacyPass() : ModulePass(ID) {
initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -2603,9 +2710,11 @@ struct GlobalOptLegacyPass : public ModulePass {
AU.addRequired<DominatorTreeWrapperPass>();
}
};
-}
+
+} // end anonymous namespace
char GlobalOptLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index e47d881d1127..792f4b3052a3 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -15,22 +15,30 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO.h"
-
-#include <set>
+#include <cstdint>
+#include <vector>
using namespace llvm;
-namespace {
-
-bool splitGlobal(GlobalVariable &GV) {
+static bool splitGlobal(GlobalVariable &GV) {
// If the address of the global is taken outside of the module, we cannot
// apply this transformation.
if (!GV.hasLocalLinkage())
@@ -130,7 +138,7 @@ bool splitGlobal(GlobalVariable &GV) {
return true;
}
-bool splitGlobals(Module &M) {
+static bool splitGlobals(Module &M) {
// First, see if the module uses either of the llvm.type.test or
// llvm.type.checked.load intrinsics, which indicates that splitting globals
// may be beneficial.
@@ -151,12 +159,16 @@ bool splitGlobals(Module &M) {
return Changed;
}
+namespace {
+
struct GlobalSplit : public ModulePass {
static char ID;
+
GlobalSplit() : ModulePass(ID) {
initializeGlobalSplitPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M) {
+
+ bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
@@ -164,11 +176,12 @@ struct GlobalSplit : public ModulePass {
}
};
-}
+} // end anonymous namespace
-INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
char GlobalSplit::ID = 0;
+INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
+
ModulePass *llvm::createGlobalSplitPass() {
return new GlobalSplit;
}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 5bb305ca84d0..d5d35ee89e0e 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeArgPromotionPass(Registry);
+ initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);
initializeCrossDSOCFIPass(Registry);
initializeDAEPass(Registry);
@@ -67,6 +68,10 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createArgumentPromotionPass());
}
+void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCalledValuePropagationPass());
+}
+
void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createConstantMergePass());
}
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 15d7515cc842..470f97b8ba61 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -8,7 +8,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 50e7cc89a3b3..b259a0abd63c 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -21,7 +20,6 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Transforms/IPO.h"
@@ -57,12 +55,23 @@ public:
InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+
+ bool RemarksEnabled = false;
+ const auto &BBs = CS.getCaller()->getBasicBlockList();
+ if (!BBs.empty()) {
+ auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
+ if (DI.isEnabled())
+ RemarksEnabled = true;
+ }
+ OptimizationRemarkEmitter ORE(CS.getCaller());
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
- /*GetBFI=*/None, PSI);
+ /*GetBFI=*/None, PSI,
+ RemarksEnabled ? &ORE : nullptr);
}
bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 317770d133b3..4449c87ddefa 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -14,29 +14,60 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "inline"
@@ -63,13 +94,16 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
namespace {
+
enum class InlinerFunctionImportStatsOpts {
No = 0,
Basic = 1,
Verbose = 2,
};
-cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
+} // end anonymous namespace
+
+static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
"inliner-function-import-stats",
cl::init(InlinerFunctionImportStatsOpts::No),
cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
@@ -77,10 +111,8 @@ cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
"printing of statistics for each inlined function")),
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
-} // namespace
-LegacyInlinerBase::LegacyInlinerBase(char &ID)
- : CallGraphSCCPass(ID), InsertLifetime(true) {}
+LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
: CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
@@ -96,7 +128,7 @@ void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
CallGraphSCCPass::getAnalysisUsage(AU);
}
-typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy;
+using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
/// Look at all of the allocas that we inlined through this call site. If we
/// have already inlined other allocas through other calls into this function,
@@ -161,7 +193,6 @@ static void mergeInlinedArrayAllocas(
// function. Also, AllocasForType can be empty of course!
bool MergedAwayAlloca = false;
for (AllocaInst *AvailableAlloca : AllocasForType) {
-
unsigned Align1 = AI->getAlignment(),
Align2 = AvailableAlloca->getAlignment();
@@ -267,7 +298,6 @@ static bool
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
int &TotalSecondaryCost,
function_ref<InlineCost(CallSite CS)> GetInlineCost) {
-
// For now we only handle local or inline functions.
if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
return false;
@@ -335,11 +365,14 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
return false;
}
-/// Return true if the inliner should attempt to inline at the given CallSite.
-static bool shouldInline(CallSite CS,
- function_ref<InlineCost(CallSite CS)> GetInlineCost,
- OptimizationRemarkEmitter &ORE) {
+/// Return the cost only if the inliner should attempt to inline at the given
+/// CallSite. If we return the cost, we will emit an optimisation remark later
+/// using that cost, so we won't do so from this function.
+static Optional<InlineCost>
+shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
+ OptimizationRemarkEmitter &ORE) {
using namespace ore;
+
InlineCost IC = GetInlineCost(CS);
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
@@ -348,32 +381,33 @@ static bool shouldInline(CallSite CS,
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
- << NV("Callee", Callee)
- << " should always be inlined (cost=always)");
- return true;
+ return IC;
}
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
<< NV("Callee", Callee) << " not inlined into "
<< NV("Caller", Caller)
- << " because it should never be inlined (cost=never)");
- return false;
+ << " because it should never be inlined (cost=never)";
+ });
+ return None;
}
if (!IC) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
- << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", thres=" << IC.getThreshold()
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
<< NV("Callee", Callee) << " not inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
- << NV("Cost", IC.getCost()) << ", threshold="
- << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
- return false;
+ << NV("Cost", IC.getCost())
+ << ", threshold=" << NV("Threshold", IC.getThreshold()) << ")";
+ });
+ return None;
}
int TotalSecondaryCost = 0;
@@ -381,23 +415,23 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
Call)
<< "Not inlining. Cost of inlining " << NV("Callee", Callee)
<< " increases the cost of inlining " << NV("Caller", Caller)
- << " in other contexts");
- return false;
+ << " in other contexts";
+ });
+
+ // IC does not bool() to false, so get an InlineCost that will.
+ // This will not be inspected to make an error message.
+ return None;
}
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
- << ", thres=" << (IC.getCostDelta() + IC.getCost())
+ << ", thres=" << IC.getThreshold()
<< ", Call: " << *CS.getInstruction() << '\n');
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call)
- << NV("Callee", Callee) << " can be inlined into "
- << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
- << " (threshold="
- << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
- return true;
+ return IC;
}
/// Return true if the specified inline history ID
@@ -475,11 +509,14 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (Function *Callee = CS.getCalledFunction())
if (Callee->isDeclaration()) {
using namespace ore;
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
<< NV("Caller", CS.getCaller())
<< " because its definition is unavailable"
- << setIsVerbose());
+ << setIsVerbose();
+ });
continue;
}
@@ -545,9 +582,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// just become a regular analysis dependency.
OptimizationRemarkEmitter ORE(Caller);
+ Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
// If the policy determines that we should inline this function,
// delete the call instead.
- if (!shouldInline(CS, GetInlineCost, ORE))
+ if (!OIC)
continue;
// If this call site is dead and it is to a readonly function, we should
@@ -562,26 +600,40 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
++NumCallsDeleted;
} else {
// Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = Instr->getDebugLoc();
+ DebugLoc DLoc = CS->getDebugLoc();
BasicBlock *Block = CS.getParent();
// Attempt to inline the function.
using namespace ore;
+
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) {
- ORE.emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller));
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
+ Block)
+ << NV("Callee", Callee) << " will not be inlined into "
+ << NV("Caller", Caller);
+ });
continue;
}
++NumInlined;
- // Report the inline decision.
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block)
- << NV("Callee", Callee) << " inlined into "
- << NV("Caller", Caller));
+ ORE.emit([&]() {
+ bool AlwaysInline = OIC->isAlways();
+ StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+ OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
+ R << NV("Callee", Callee) << " inlined into ";
+ R << NV("Caller", Caller);
+ if (AlwaysInline)
+ R << " with cost=always";
+ else {
+ R << " with cost=" << NV("Cost", OIC->getCost());
+ R << " (threshold=" << NV("Threshold", OIC->getThreshold());
+ R << ")";
+ }
+ return R;
+ });
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -601,7 +653,6 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
// TODO: Can remove if in SCC now.
!SCCFunctions.count(Callee) &&
-
// The function may be apparently dead, but if there are indirect
// callgraph references to the node, we cannot delete it yet, this
// could invalidate the CGSCC iterator.
@@ -840,6 +891,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
.getManager();
+
+ // Get the remarks emission analysis for the caller.
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
@@ -852,12 +907,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Function &Callee = *CS.getCalledFunction();
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
- PSI);
+ PSI, &ORE);
};
- // Get the remarks emission analysis for the caller.
- auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
// Now process as many calls as we have within this caller in the sequnece.
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
@@ -872,8 +924,22 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
continue;
+ // Check if this inlining may repeat breaking an SCC apart that has
+ // already been split once before. In that case, inlining here may
+ // trigger infinite inlining, much like is prevented within the inliner
+ // itself by the InlineHistory above, but spread across CGSCC iterations
+ // and thus hidden from the full inline history.
+ if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
+ UR.InlinedInternalEdges.count({&N, C})) {
+ DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
+ "previously split out of this SCC by inlining: "
+ << F.getName() << " -> " << Callee.getName() << "\n");
+ continue;
+ }
+
+ Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
// Check whether we want to inline this callsite.
- if (!shouldInline(CS, GetInlineCost, ORE))
+ if (!OIC)
continue;
// Setup the data structure used to plumb customization into the
@@ -883,11 +949,39 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
&FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- if (!InlineFunction(CS, IFI))
+ // Get DebugLoc to report. CS will be invalid after Inliner.
+ DebugLoc DLoc = CS->getDebugLoc();
+ BasicBlock *Block = CS.getParent();
+
+ using namespace ore;
+
+ if (!InlineFunction(CS, IFI)) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ << NV("Callee", &Callee) << " will not be inlined into "
+ << NV("Caller", &F);
+ });
continue;
+ }
DidInline = true;
InlinedCallees.insert(&Callee);
+ ORE.emit([&]() {
+ bool AlwaysInline = OIC->isAlways();
+ StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
+ OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block);
+ R << NV("Callee", &Callee) << " inlined into ";
+ R << NV("Caller", &F);
+ if (AlwaysInline)
+ R << " with cost=always";
+ else {
+ R << " with cost=" << NV("Cost", OIC->getCost());
+ R << " (threshold=" << NV("Threshold", OIC->getThreshold());
+ R << ")";
+ }
+ return R;
+ });
+
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
@@ -949,17 +1043,38 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
for (LazyCallGraph::Edge &E : *CalleeN)
RC->insertTrivialRefEdge(N, E.getNode());
}
- InlinedCallees.clear();
// At this point, since we have made changes we have at least removed
// a call instruction. However, in the process we do some incremental
// simplification of the surrounding code. This simplification can
// essentially do all of the same things as a function pass and we can
// re-use the exact same logic for updating the call graph to reflect the
- // change..
+ // change.
+ LazyCallGraph::SCC *OldC = C;
C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
+
+ // If this causes an SCC to split apart into multiple smaller SCCs, there
+ // is a subtle risk we need to prepare for. Other transformations may
+ // expose an "infinite inlining" opportunity later, and because of the SCC
+ // mutation, we will revisit this function and potentially re-inline. If we
+ // do, and that re-inlining also has the potentially to mutate the SCC
+ // structure, the infinite inlining problem can manifest through infinite
+ // SCC splits and merges. To avoid this, we capture the originating caller
+ // node and the SCC containing the call edge. This is a slight over
+ // approximation of the possible inlining decisions that must be avoided,
+ // but is relatively efficient to store.
+ // FIXME: This seems like a very heavyweight way of retaining the inline
+ // history, we should look for a more efficient way of tracking it.
+ if (C != OldC && llvm::any_of(InlinedCallees, [&](Function *Callee) {
+ return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
+ })) {
+ DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
+ "retaining this to avoid infinite inlining.\n");
+ UR.InlinedInternalEdges.insert({&N, OldC});
+ }
+ InlinedCallees.clear();
}
// Now that we've finished inlining all of the calls across this SCC, delete
@@ -976,8 +1091,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
.getManager();
- FAM.clear(*DeadF);
- AM.clear(DeadC);
+ FAM.clear(*DeadF, DeadF->getName());
+ AM.clear(DeadC, DeadC.getName());
auto &DeadRC = DeadC.getOuterRefSCC();
CG.removeDeadFunction(*DeadF);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index c74b0a35e296..36b6bdba2cd0 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -80,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
//
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
-bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipLoop(L))
return false;
@@ -143,7 +143,8 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LI.markAsRemoved(L);
+ LPM.markLoopAsDeleted(*L);
+ LI.erase(L);
}
++NumExtracted;
}
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 693df5e7ba92..8db7e1e142d2 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1,4 +1,4 @@
-//===-- LowerTypeTests.cpp - type metadata lowering pass ------------------===//
+//===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,32 +13,70 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TrailingObjects.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <set>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace lowertypetests;
@@ -197,6 +235,7 @@ struct ByteArrayInfo {
uint64_t BitSize;
GlobalVariable *ByteArray;
GlobalVariable *MaskGlobal;
+ uint8_t *MaskPtr = nullptr;
};
/// A POD-like structure that we use to store a global reference together with
@@ -205,16 +244,19 @@ struct ByteArrayInfo {
/// operation involving a map lookup; this data structure helps to reduce the
/// number of times we need to do this lookup.
class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
+ friend TrailingObjects;
+
GlobalObject *GO;
size_t NTypes;
+
// For functions: true if this is a definition (either in the merged module or
// in one of the thinlto modules).
bool IsDefinition;
+
// For functions: true if this function is either defined or used in a thinlto
// module and its jumptable entry needs to be exported to thinlto backends.
bool IsExported;
- friend TrailingObjects;
size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
public:
@@ -231,15 +273,19 @@ public:
GTM->getTrailingObjects<MDNode *>());
return GTM;
}
+
GlobalObject *getGlobal() const {
return GO;
}
+
bool isDefinition() const {
return IsDefinition;
}
+
bool isExported() const {
return IsExported;
}
+
ArrayRef<MDNode *> types() const {
return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes);
}
@@ -258,6 +304,7 @@ class LowerTypeTestsModule {
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0);
IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
@@ -307,7 +354,8 @@ class LowerTypeTestsModule {
Function *WeakInitializerFn = nullptr;
- void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
+ bool shouldExportConstantsAsAbsoluteSymbols();
+ uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
TypeIdLowering importTypeId(StringRef TypeId);
void importTypeTest(CallInst *CI);
void importFunction(Function *F, bool isDefinition);
@@ -329,6 +377,7 @@ class LowerTypeTestsModule {
unsigned getJumpTableEntrySize();
Type *getJumpTableEntryType();
void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
+ Triple::ArchType JumpTableArch,
SmallVectorImpl<Value *> &AsmArgs, Function *Dest);
void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
@@ -350,6 +399,7 @@ class LowerTypeTestsModule {
public:
LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary);
+
bool lower();
// Lower the module using the action and summary passed as command line
@@ -385,11 +435,12 @@ struct LowerTypeTests : public ModulePass {
}
};
-} // anonymous namespace
+} // end anonymous namespace
+
+char LowerTypeTests::ID = 0;
INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
false)
-char LowerTypeTests::ID = 0;
ModulePass *
llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
@@ -473,6 +524,8 @@ void LowerTypeTestsModule::allocateByteArrays() {
BAI->MaskGlobal->replaceAllUsesWith(
ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));
BAI->MaskGlobal->eraseFromParent();
+ if (BAI->MaskPtr)
+ *BAI->MaskPtr = Mask;
}
Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
@@ -634,6 +687,10 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
Br->getMetadata(LLVMContext::MD_prof));
ReplaceInstWithInst(InitialBB->getTerminator(), NewBr);
+ // Update phis in Else resulting from InitialBB being split
+ for (auto &Phi : Else->phis())
+ Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB);
+
IRBuilder<> ThenB(CI);
return createBitSetTest(ThenB, TIL, BitOffset);
}
@@ -720,13 +777,21 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
}
}
+bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() {
+ return (Arch == Triple::x86 || Arch == Triple::x86_64) &&
+ ObjectFormat == Triple::ELF;
+}
+
/// Export the given type identifier so that ThinLTO backends may import it.
/// Type identifiers are exported by adding coarse-grained information about how
/// to test the type identifier to the summary, and creating symbols in the
/// object file (aliases and absolute symbols) containing fine-grained
/// information about the type identifier.
-void LowerTypeTestsModule::exportTypeId(StringRef TypeId,
- const TypeIdLowering &TIL) {
+///
+/// Returns a pointer to the location in which to store the bitmask, if
+/// applicable.
+uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId,
+ const TypeIdLowering &TIL) {
TypeTestResolution &TTRes =
ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
TTRes.TheKind = TIL.TheKind;
@@ -738,14 +803,21 @@ void LowerTypeTestsModule::exportTypeId(StringRef TypeId,
GA->setVisibility(GlobalValue::HiddenVisibility);
};
+ auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) {
+ if (shouldExportConstantsAsAbsoluteSymbols())
+ ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Int8PtrTy));
+ else
+ Storage = cast<ConstantInt>(C)->getZExtValue();
+ };
+
if (TIL.TheKind != TypeTestResolution::Unsat)
ExportGlobal("global_addr", TIL.OffsetedGlobal);
if (TIL.TheKind == TypeTestResolution::ByteArray ||
TIL.TheKind == TypeTestResolution::Inline ||
TIL.TheKind == TypeTestResolution::AllOnes) {
- ExportGlobal("align", ConstantExpr::getIntToPtr(TIL.AlignLog2, Int8PtrTy));
- ExportGlobal("size_m1", ConstantExpr::getIntToPtr(TIL.SizeM1, Int8PtrTy));
+ ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2);
+ ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1);
uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
if (TIL.TheKind == TypeTestResolution::Inline)
@@ -756,12 +828,16 @@ void LowerTypeTestsModule::exportTypeId(StringRef TypeId,
if (TIL.TheKind == TypeTestResolution::ByteArray) {
ExportGlobal("byte_array", TIL.TheByteArray);
- ExportGlobal("bit_mask", TIL.BitMask);
+ if (shouldExportConstantsAsAbsoluteSymbols())
+ ExportGlobal("bit_mask", TIL.BitMask);
+ else
+ return &TTRes.BitMask;
}
if (TIL.TheKind == TypeTestResolution::Inline)
- ExportGlobal("inline_bits",
- ConstantExpr::getIntToPtr(TIL.InlineBits, Int8PtrTy));
+ ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits);
+
+ return nullptr;
}
LowerTypeTestsModule::TypeIdLowering
@@ -774,16 +850,34 @@ LowerTypeTestsModule::importTypeId(StringRef TypeId) {
TypeIdLowering TIL;
TIL.TheKind = TTRes.TheKind;
- auto ImportGlobal = [&](StringRef Name, unsigned AbsWidth) {
- Constant *C =
- M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), Int8Ty);
- auto *GV = dyn_cast<GlobalVariable>(C);
- // We only need to set metadata if the global is newly created, in which
- // case it would not have hidden visibility.
- if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ auto ImportGlobal = [&](StringRef Name) {
+ // Give the global a type of length 0 so that it is not assumed not to alias
+ // with any other global.
+ Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(),
+ Int8Arr0Ty);
+ if (auto *GV = dyn_cast<GlobalVariable>(C))
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ C = ConstantExpr::getBitCast(C, Int8PtrTy);
+ return C;
+ };
+
+ auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth,
+ Type *Ty) {
+ if (!shouldExportConstantsAsAbsoluteSymbols()) {
+ Constant *C =
+ ConstantInt::get(isa<IntegerType>(Ty) ? Ty : Int64Ty, Const);
+ if (!isa<IntegerType>(Ty))
+ C = ConstantExpr::getIntToPtr(C, Ty);
+ return C;
+ }
+
+ Constant *C = ImportGlobal(Name);
+ auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
+ if (isa<IntegerType>(Ty))
+ C = ConstantExpr::getPtrToInt(C, Ty);
+ if (GV->getMetadata(LLVMContext::MD_absolute_symbol))
return C;
- GV->setVisibility(GlobalValue::HiddenVisibility);
auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
@@ -792,30 +886,30 @@ LowerTypeTestsModule::importTypeId(StringRef TypeId) {
};
if (AbsWidth == IntPtrTy->getBitWidth())
SetAbsRange(~0ull, ~0ull); // Full set.
- else if (AbsWidth)
+ else
SetAbsRange(0, 1ull << AbsWidth);
return C;
};
if (TIL.TheKind != TypeTestResolution::Unsat)
- TIL.OffsetedGlobal = ImportGlobal("global_addr", 0);
+ TIL.OffsetedGlobal = ImportGlobal("global_addr");
if (TIL.TheKind == TypeTestResolution::ByteArray ||
TIL.TheKind == TypeTestResolution::Inline ||
TIL.TheKind == TypeTestResolution::AllOnes) {
- TIL.AlignLog2 = ConstantExpr::getPtrToInt(ImportGlobal("align", 8), Int8Ty);
- TIL.SizeM1 = ConstantExpr::getPtrToInt(
- ImportGlobal("size_m1", TTRes.SizeM1BitWidth), IntPtrTy);
+ TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty);
+ TIL.SizeM1 =
+ ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy);
}
if (TIL.TheKind == TypeTestResolution::ByteArray) {
- TIL.TheByteArray = ImportGlobal("byte_array", 0);
- TIL.BitMask = ImportGlobal("bit_mask", 8);
+ TIL.TheByteArray = ImportGlobal("byte_array");
+ TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy);
}
if (TIL.TheKind == TypeTestResolution::Inline)
- TIL.InlineBits = ConstantExpr::getPtrToInt(
- ImportGlobal("inline_bits", 1 << TTRes.SizeM1BitWidth),
+ TIL.InlineBits = ImportConstant(
+ "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth,
TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
return TIL;
@@ -894,6 +988,7 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
BSI.print(dbgs());
});
+ ByteArrayInfo *BAI = nullptr;
TypeIdLowering TIL;
TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
@@ -915,15 +1010,18 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
} else {
TIL.TheKind = TypeTestResolution::ByteArray;
++NumByteArraysCreated;
- ByteArrayInfo *BAI = createByteArray(BSI);
+ BAI = createByteArray(BSI);
TIL.TheByteArray = BAI->ByteArray;
TIL.BitMask = BAI->MaskGlobal;
}
TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
- if (TIUI.IsExported)
- exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
+ if (TIUI.IsExported) {
+ uint8_t *MaskPtr = exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
+ if (BAI)
+ BAI->MaskPtr = MaskPtr;
+ }
// Lower each call to llvm.type.test for this type identifier.
for (CallInst *CI : TIUI.CallSites) {
@@ -979,15 +1077,16 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
// constraints and arguments to AsmOS, ConstraintOS and AsmArgs.
void LowerTypeTestsModule::createJumpTableEntry(
raw_ostream &AsmOS, raw_ostream &ConstraintOS,
- SmallVectorImpl<Value *> &AsmArgs, Function *Dest) {
+ Triple::ArchType JumpTableArch, SmallVectorImpl<Value *> &AsmArgs,
+ Function *Dest) {
unsigned ArgIndex = AsmArgs.size();
- if (Arch == Triple::x86 || Arch == Triple::x86_64) {
+ if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
AsmOS << "int3\nint3\nint3\n";
- } else if (Arch == Triple::arm || Arch == Triple::aarch64) {
+ } else if (JumpTableArch == Triple::arm || JumpTableArch == Triple::aarch64) {
AsmOS << "b $" << ArgIndex << "\n";
- } else if (Arch == Triple::thumb) {
+ } else if (JumpTableArch == Triple::thumb) {
AsmOS << "b.w $" << ArgIndex << "\n";
} else {
report_fatal_error("Unsupported architecture for jump tables");
@@ -1074,6 +1173,46 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
PlaceholderFn->eraseFromParent();
}
+static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
+ Attribute TFAttr = F->getFnAttribute("target-features");
+ if (!TFAttr.hasAttribute(Attribute::None)) {
+ SmallVector<StringRef, 6> Features;
+ TFAttr.getValueAsString().split(Features, ',');
+ for (StringRef Feature : Features) {
+ if (Feature == "-thumb-mode")
+ return false;
+ else if (Feature == "+thumb-mode")
+ return true;
+ }
+ }
+
+ return ModuleArch == Triple::thumb;
+}
+
+// Each jump table must be either ARM or Thumb as a whole for the bit-test math
+// to work. Pick one that matches the majority of members to minimize interop
+// veneers inserted by the linker.
+static Triple::ArchType
+selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
+ Triple::ArchType ModuleArch) {
+ if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb)
+ return ModuleArch;
+
+ unsigned ArmCount = 0, ThumbCount = 0;
+ for (const auto GTM : Functions) {
+ if (!GTM->isDefinition()) {
+ // PLT stubs are always ARM.
+ ++ArmCount;
+ continue;
+ }
+
+ Function *F = cast<Function>(GTM->getGlobal());
+ ++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount);
+ }
+
+ return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
+}
+
void LowerTypeTestsModule::createJumpTable(
Function *F, ArrayRef<GlobalTypeMember *> Functions) {
std::string AsmStr, ConstraintStr;
@@ -1081,8 +1220,10 @@ void LowerTypeTestsModule::createJumpTable(
SmallVector<Value *, 16> AsmArgs;
AsmArgs.reserve(Functions.size() * 2);
+ Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch);
+
for (unsigned I = 0; I != Functions.size(); ++I)
- createJumpTableEntry(AsmOS, ConstraintOS, AsmArgs,
+ createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
cast<Function>(Functions[I]->getGlobal()));
// Try to emit the jump table at the end of the text segment.
@@ -1098,11 +1239,15 @@ void LowerTypeTestsModule::createJumpTable(
// Luckily, this function does not get any prologue even without the
// attribute.
if (OS != Triple::Win32)
- F->addFnAttr(llvm::Attribute::Naked);
- // Thumb jump table assembly needs Thumb2. The following attribute is added by
- // Clang for -march=armv7.
- if (Arch == Triple::thumb)
+ F->addFnAttr(Attribute::Naked);
+ if (JumpTableArch == Triple::arm)
+ F->addFnAttr("target-features", "-thumb-mode");
+ if (JumpTableArch == Triple::thumb) {
+ F->addFnAttr("target-features", "+thumb-mode");
+ // Thumb jump table assembly needs Thumb2. The following attribute is added
+ // by Clang for -march=armv7.
F->addFnAttr("target-cpu", "cortex-a8");
+ }
BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
IRBuilder<> IRB(BB);
@@ -1256,7 +1401,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
FAlias->takeName(F);
if (FAlias->hasName())
F->setName(FAlias->getName() + ".cfi");
- F->replaceAllUsesWith(FAlias);
+ F->replaceUsesExceptBlockAddr(FAlias);
}
if (!F->isDeclarationForLinker())
F->setLinkage(GlobalValue::InternalLinkage);
@@ -1303,7 +1448,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM(
void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
- llvm::DenseMap<Metadata *, uint64_t> TypeIdIndices;
+ DenseMap<Metadata *, uint64_t> TypeIdIndices;
for (unsigned I = 0; I != TypeIds.size(); ++I)
TypeIdIndices[TypeIds[I]] = I;
@@ -1335,38 +1480,25 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
for (auto &&MemSet : TypeMembers)
GLB.addFragment(MemSet);
- // Build the bitsets from this disjoint set.
- if (Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal())) {
- // Build a vector of global variables with the computed layout.
- std::vector<GlobalTypeMember *> OrderedGVs(Globals.size());
- auto OGI = OrderedGVs.begin();
- for (auto &&F : GLB.Fragments) {
- for (auto &&Offset : F) {
- auto GV = dyn_cast<GlobalVariable>(Globals[Offset]->getGlobal());
- if (!GV)
- report_fatal_error("Type identifier may not contain both global "
- "variables and functions");
- *OGI++ = Globals[Offset];
- }
- }
-
- buildBitSetsFromGlobalVariables(TypeIds, OrderedGVs);
- } else {
- // Build a vector of functions with the computed layout.
- std::vector<GlobalTypeMember *> OrderedFns(Globals.size());
- auto OFI = OrderedFns.begin();
- for (auto &&F : GLB.Fragments) {
- for (auto &&Offset : F) {
- auto Fn = dyn_cast<Function>(Globals[Offset]->getGlobal());
- if (!Fn)
- report_fatal_error("Type identifier may not contain both global "
- "variables and functions");
- *OFI++ = Globals[Offset];
- }
+ // Build a vector of globals with the computed layout.
+ bool IsGlobalSet =
+ Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal());
+ std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size());
+ auto OGTMI = OrderedGTMs.begin();
+ for (auto &&F : GLB.Fragments) {
+ for (auto &&Offset : F) {
+ if (IsGlobalSet != isa<GlobalVariable>(Globals[Offset]->getGlobal()))
+ report_fatal_error("Type identifier may not contain both global "
+ "variables and functions");
+ *OGTMI++ = Globals[Offset];
}
-
- buildBitSetsFromFunctions(TypeIds, OrderedFns);
}
+
+ // Build the bitsets from this disjoint set.
+ if (IsGlobalSet)
+ buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs);
+ else
+ buildBitSetsFromFunctions(TypeIds, OrderedGTMs);
}
/// Lower all type tests in this module.
@@ -1457,8 +1589,8 @@ bool LowerTypeTestsModule::lower() {
// Equivalence class set containing type identifiers and the globals that
// reference them. This is used to partition the set of type identifiers in
// the module into disjoint sets.
- typedef EquivalenceClasses<PointerUnion<GlobalTypeMember *, Metadata *>>
- GlobalClassesTy;
+ using GlobalClassesTy =
+ EquivalenceClasses<PointerUnion<GlobalTypeMember *, Metadata *>>;
GlobalClassesTy GlobalClasses;
// Verify the type metadata and build a few data structures to let us
@@ -1473,7 +1605,7 @@ bool LowerTypeTestsModule::lower() {
unsigned Index;
std::vector<GlobalTypeMember *> RefGlobals;
};
- llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo;
+ DenseMap<Metadata *, TIInfo> TypeIdInfo;
unsigned I = 0;
SmallVector<MDNode *, 2> Types;
@@ -1512,14 +1644,27 @@ bool LowerTypeTestsModule::lower() {
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalVariable::ExternalLinkage, FunctionName, &M);
- if (Linkage == CFL_Definition)
- F->eraseMetadata(LLVMContext::MD_type);
+ // If the function is available_externally, remove its definition so
+ // that it is handled the same way as a declaration. Later we will try
+ // to create an alias using this function's linkage, which will fail if
+ // the linkage is available_externally. This will also result in us
+ // following the code path below to replace the type metadata.
+ if (F->hasAvailableExternallyLinkage()) {
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ F->deleteBody();
+ F->setComdat(nullptr);
+ F->clearMetadata();
+ }
+ // If the function in the full LTO module is a declaration, replace its
+ // type metadata with the type metadata we found in cfi.functions. That
+ // metadata is presumed to be more accurate than the metadata attached
+ // to the declaration.
if (F->isDeclaration()) {
if (Linkage == CFL_WeakDeclaration)
F->setLinkage(GlobalValue::ExternalWeakLinkage);
- SmallVector<MDNode *, 2> Types;
+ F->eraseMetadata(LLVMContext::MD_type);
for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
F->addMetadata(LLVMContext::MD_type,
*cast<MDNode>(FuncMD->getOperand(I).get()));
@@ -1548,7 +1693,7 @@ bool LowerTypeTestsModule::lower() {
GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types);
for (MDNode *Type : Types) {
verifyTypeMDNode(&GO, Type);
- auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)];
+ auto &Info = TypeIdInfo[Type->getOperand(1)];
Info.Index = ++I;
Info.RefGlobals.push_back(GTM);
}
@@ -1596,12 +1741,12 @@ bool LowerTypeTestsModule::lower() {
for (auto &P : *ExportSummary) {
for (auto &S : P.second.SummaryList) {
- auto *FS = dyn_cast<FunctionSummary>(S.get());
- if (!FS || !ExportSummary->isGlobalValueLive(FS))
+ if (!ExportSummary->isGlobalValueLive(S.get()))
continue;
- for (GlobalValue::GUID G : FS->type_tests())
- for (Metadata *MD : MetadataByGUID[G])
- AddTypeIdUse(MD).IsExported = true;
+ if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()))
+ for (GlobalValue::GUID G : FS->type_tests())
+ for (Metadata *MD : MetadataByGUID[G])
+ AddTypeIdUse(MD).IsExported = true;
}
}
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 0e478ba607be..76b90391fbb1 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -89,28 +89,45 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <set>
+#include <utility>
#include <vector>
using namespace llvm;
@@ -119,7 +136,6 @@ using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
STATISTIC(NumThunksWritten, "Number of thunks generated");
-STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
static cl::opt<unsigned> NumFunctionsForSanityCheck(
@@ -154,10 +170,12 @@ namespace {
class FunctionNode {
mutable AssertingVH<Function> F;
FunctionComparator::FunctionHash Hash;
+
public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
FunctionNode(Function *F)
: F(F), Hash(FunctionComparator::functionHash(*F)) {}
+
Function *getFunc() const { return F; }
FunctionComparator::FunctionHash getHash() const { return Hash; }
@@ -174,13 +192,12 @@ public:
/// by considering all pointer types to be equivalent. Once identified,
/// MergeFunctions will fold them by replacing a call to one to a call to a
/// bitcast of the other.
-///
class MergeFunctions : public ModulePass {
public:
static char ID;
+
MergeFunctions()
- : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
- HasGlobalAliases(false) {
+ : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)) {
initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
}
@@ -191,8 +208,10 @@ private:
// not need to become larger with another pointer.
class FunctionNodeCmp {
GlobalNumberState* GlobalNumbers;
+
public:
FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+
bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
// Order first by hashes, then full function comparison.
if (LHS.getHash() != RHS.getHash())
@@ -201,7 +220,7 @@ private:
return FCmp.compare() == -1;
}
};
- typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+ using FnTreeType = std::set<FunctionNode, FunctionNodeCmp>;
GlobalNumberState GlobalNumbers;
@@ -209,9 +228,9 @@ private:
/// analyzed again.
std::vector<WeakTrackingVH> Deferred;
+#ifndef NDEBUG
/// Checks the rules of order relation introduced among functions set.
/// Returns true, if sanity check has been passed, and false if failed.
-#ifndef NDEBUG
bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
#endif
@@ -236,9 +255,6 @@ private:
/// again.
void mergeTwoFunctions(Function *F, Function *G);
- /// Replace G with a thunk or an alias to F. Deletes G.
- void writeThunkOrAlias(Function *F, Function *G);
-
/// Fill PDIUnrelatedWL with instructions from the entry block that are
/// unrelated to parameter related debug info.
void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
@@ -256,29 +272,25 @@ private:
/// delete G.
void writeThunk(Function *F, Function *G);
- /// Replace G with an alias to F. Deletes G.
- void writeAlias(Function *F, Function *G);
-
/// Replace function F with function G in the function tree.
void replaceFunctionInTree(const FunctionNode &FN, Function *G);
/// The set of all distinct functions. Use the insert() and remove() methods
/// to modify it. The map allows efficient lookup and deferring of Functions.
FnTreeType FnTree;
+
// Map functions to the iterators of the FunctionNode which contains them
// in the FnTree. This must be updated carefully whenever the FnTree is
// modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
// dangling iterators into FnTree. The invariant that preserves this is that
// there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
-
- /// Whether or not the target supports global aliases.
- bool HasGlobalAliases;
};
} // end anonymous namespace
char MergeFunctions::ID = 0;
+
INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
ModulePass *llvm::createMergeFunctionsPass() {
@@ -454,19 +466,6 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
}
}
-// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
-void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
- if (HasGlobalAliases && G->hasGlobalUnnamedAddr()) {
- if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
- G->hasWeakLinkage()) {
- writeAlias(F, G);
- return;
- }
- }
-
- writeThunk(F, G);
-}
-
// Helper for writeThunk,
// Selects proper bitcast operation,
// but a bit simpler then CastInst::getCastOpcode.
@@ -499,7 +498,6 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
// parameter debug info, from the entry block.
void MergeFunctions::eraseInstsUnrelatedToPDI(
std::vector<Instruction *> &PDIUnrelatedWL) {
-
DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in "
"entry block) unrelated to parameter debug info from entry "
"block: {\n");
@@ -517,7 +515,6 @@ void MergeFunctions::eraseInstsUnrelatedToPDI(
// Reduce G to its entry block.
void MergeFunctions::eraseTail(Function *G) {
-
std::vector<BasicBlock *> WorklistBB;
for (Function::iterator BBI = std::next(G->begin()), BBE = G->end();
BBI != BBE; ++BBI) {
@@ -542,7 +539,6 @@ void MergeFunctions::eraseTail(Function *G) {
// PDIUnrelatedWL with such instructions.
void MergeFunctions::filterInstsUnrelatedToPDI(
BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
-
std::set<Instruction *> PDIRelated;
for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
BI != BIE; ++BI) {
@@ -652,9 +648,18 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
// call sites to point to F even when within the same translation unit.
void MergeFunctions::writeThunk(Function *F, Function *G) {
if (!G->isInterposable() && !MergeFunctionsPDI) {
- // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
- // above).
- replaceDirectCallers(G, F);
+ if (G->hasGlobalUnnamedAddr()) {
+ // G might have been a key in our GlobalNumberState, and it's illegal
+ // to replace a key in ValueMap<GlobalValue *> with a non-global.
+ GlobalNumbers.erase(G);
+ // If G's address is not significant, replace it entirely.
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ G->replaceAllUsesWith(BitcastF);
+ } else {
+ // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
+ // above).
+ replaceDirectCallers(G, F);
+ }
}
// If G was internal then we may have replaced all uses of G with F. If so,
@@ -665,6 +670,16 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
return;
}
+ // Don't merge tiny functions using a thunk, since it can just end up
+ // making the function larger.
+ if (F->size() == 1) {
+ if (F->front().size() <= 2) {
+ DEBUG(dbgs() << "writeThunk: " << F->getName()
+ << " is too small to bother creating a thunk for\n");
+ return;
+ }
+ }
+
BasicBlock *GEntryBlock = nullptr;
std::vector<Instruction *> PDIUnrelatedWL;
BasicBlock *BB = nullptr;
@@ -691,7 +706,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Argument & AI : H->args()) {
+ for (Argument &AI : H->args()) {
Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
@@ -734,20 +749,6 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
++NumThunksWritten;
}
-// Replace G with an alias to F and delete G.
-void MergeFunctions::writeAlias(Function *F, Function *G) {
- auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
- GA->takeName(G);
- GA->setVisibility(G->getVisibility());
- removeUsers(G);
- G->replaceAllUsesWith(GA);
- G->eraseFromParent();
-
- DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
- ++NumAliasesWritten;
-}
-
// Merge two equivalent functions. Upon completion, Function G is deleted.
void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
if (F->isInterposable()) {
@@ -763,19 +764,14 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
- if (HasGlobalAliases) {
- writeAlias(F, G);
- writeAlias(F, H);
- } else {
- writeThunk(F, G);
- writeThunk(F, H);
- }
+ writeThunk(F, G);
+ writeThunk(F, H);
F->setAlignment(MaxAlignment);
F->setLinkage(GlobalValue::PrivateLinkage);
++NumDoubleWeak;
} else {
- writeThunkOrAlias(F, G);
+ writeThunk(F, G);
}
++NumFunctionsMerged;
@@ -816,18 +812,6 @@ bool MergeFunctions::insert(Function *NewFunction) {
const FunctionNode &OldF = *Result.first;
- // Don't merge tiny functions, since it can just end up making the function
- // larger.
- // FIXME: Should still merge them if they are unnamed_addr and produce an
- // alias.
- if (NewFunction->size() == 1) {
- if (NewFunction->front().size() <= 2) {
- DEBUG(dbgs() << NewFunction->getName()
- << " is to small to bother merging\n");
- return false;
- }
- }
-
// Impose a total order (by name) on the replacement of functions. This is
// important when operating on more than one module independently to prevent
// cycles of thunks calling each other when the modules are linked together.
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 8840435af642..683655f1f68b 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -13,45 +13,126 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/PartialInlining.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <tuple>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "partial-inlining"
STATISTIC(NumPartialInlined,
"Number of callsites functions partially inlined into.");
+STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
+ "cold outlined regions were partially "
+ "inlined into its caller(s).");
+STATISTIC(NumColdRegionsFound,
+ "Number of cold single entry/exit regions found.");
+STATISTIC(NumColdRegionsOutlined,
+ "Number of cold single entry/exit regions outlined.");
// Command line option to disable partial-inlining. The default is false:
static cl::opt<bool>
DisablePartialInlining("disable-partial-inlining", cl::init(false),
- cl::Hidden, cl::desc("Disable partial ininling"));
+ cl::Hidden, cl::desc("Disable partial inlining"));
+// Command line option to disable multi-region partial-inlining. The default is
+// false:
+static cl::opt<bool> DisableMultiRegionPartialInline(
+ "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
+ cl::desc("Disable multi-region partial inlining"));
+
+// Command line option to force outlining in regions with live exit variables.
+// The default is false:
+static cl::opt<bool>
+ ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
+ cl::desc("Force outline regions with live exits"));
+
+// Command line option to enable marking outline functions with Cold Calling
+// Convention. The default is false:
+static cl::opt<bool>
+ MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
+ cl::desc("Mark outline function calls with ColdCC"));
+
+#ifndef NDEBUG
+// Command line option to debug partial-inlining. The default is none:
+static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
+ cl::init(false), cl::Hidden,
+ cl::desc("Trace partial inlining."));
+#endif
+
// This is an option used by testing:
static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
cl::init(false), cl::ZeroOrMore,
cl::ReallyHidden,
cl::desc("Skip Cost Analysis"));
+// Used to determine if a cold region is worth outlining based on
+// its inlining cost compared to the original function. Default is set at 10%.
+// ie. if the cold region reduces the inlining cost of the original function by
+// at least 10%.
+static cl::opt<float> MinRegionSizeRatio(
+ "min-region-size-ratio", cl::init(0.1), cl::Hidden,
+ cl::desc("Minimum ratio comparing relative sizes of each "
+ "outline candidate and original function"));
+// Used to tune the minimum number of execution counts needed in the predecessor
+// block to the cold edge. ie. confidence interval.
+static cl::opt<unsigned>
+ MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
+ cl::desc("Minimum block executions to consider "
+ "its BranchProbabilityInfo valid"));
+// Used to determine when an edge is considered cold. Default is set to 10%. ie.
+// if the branch probability is 10% or less, then it is deemed as 'cold'.
+static cl::opt<float> ColdBranchRatio(
+ "cold-branch-ratio", cl::init(0.1), cl::Hidden,
+ cl::desc("Minimum BranchProbability to consider a region cold."));
static cl::opt<unsigned> MaxNumInlineBlocks(
"max-num-inline-blocks", cl::init(5), cl::Hidden,
- cl::desc("Max Number of Blocks To be Partially Inlined"));
+ cl::desc("Max number of blocks to be partially inlined"));
// Command line option to set the maximum number of partial inlining allowed
// for the module. The default value of -1 means no limit.
@@ -75,9 +156,8 @@ static cl::opt<unsigned> ExtraOutliningPenalty(
namespace {
struct FunctionOutliningInfo {
- FunctionOutliningInfo()
- : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
- ReturnBlockPreds() {}
+ FunctionOutliningInfo() = default;
+
// Returns the number of blocks to be inlined including all blocks
// in Entries and one return block.
unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
@@ -85,30 +165,69 @@ struct FunctionOutliningInfo {
// A set of blocks including the function entry that guard
// the region to be outlined.
SmallVector<BasicBlock *, 4> Entries;
+
// The return block that is not included in the outlined region.
- BasicBlock *ReturnBlock;
+ BasicBlock *ReturnBlock = nullptr;
+
// The dominating block of the region to be outlined.
- BasicBlock *NonReturnBlock;
+ BasicBlock *NonReturnBlock = nullptr;
+
// The set of blocks in Entries that that are predecessors to ReturnBlock
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
};
+struct FunctionOutliningMultiRegionInfo {
+ FunctionOutliningMultiRegionInfo()
+ : ORI() {}
+
+ // Container for outline regions
+ struct OutlineRegionInfo {
+ OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
+ BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+ BasicBlock *ReturnBlock)
+ : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
+ ReturnBlock(ReturnBlock) {}
+ SmallVector<BasicBlock *, 8> Region;
+ BasicBlock *EntryBlock;
+ BasicBlock *ExitBlock;
+ BasicBlock *ReturnBlock;
+ };
+
+ SmallVector<OutlineRegionInfo, 4> ORI;
+};
+
struct PartialInlinerImpl {
+
PartialInlinerImpl(
std::function<AssumptionCache &(Function &)> *GetAC,
std::function<TargetTransformInfo &(Function &)> *GTTI,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
- ProfileSummaryInfo *ProfSI)
- : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
+ ProfileSummaryInfo *ProfSI,
+ std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
+ : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI),
+ GetORE(GORE) {}
+
bool run(Module &M);
- Function *unswitchFunction(Function *F);
+ // Main part of the transformation that calls helper functions to find
+ // outlining candidates, clone & outline the function, and attempt to
+ // partially inline the resulting function. Returns true if
+ // inlining was successful, false otherwise. Also returns the outline
+ // function (only if we partially inlined early returns) as there is a
+ // possibility to further "peel" early return statements that were left in the
+ // outline function due to code size.
+ std::pair<bool, Function *> unswitchFunction(Function *F);
// This class speculatively clones the the function to be partial inlined.
// At the end of partial inlining, the remaining callsites to the cloned
// function that are not partially inlined will be fixed up to reference
// the original function, and the cloned function will be erased.
struct FunctionCloner {
- FunctionCloner(Function *F, FunctionOutliningInfo *OI);
+ // Two constructors, one for single region outlining, the other for
+ // multi-region outlining.
+ FunctionCloner(Function *F, FunctionOutliningInfo *OI,
+ OptimizationRemarkEmitter &ORE);
+ FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
+ OptimizationRemarkEmitter &ORE);
~FunctionCloner();
// Prepare for function outlining: making sure there is only
@@ -116,20 +235,34 @@ struct PartialInlinerImpl {
// the return block.
void NormalizeReturnBlock();
- // Do function outlining:
- Function *doFunctionOutlining();
+ // Do function outlining for cold regions.
+ bool doMultiRegionFunctionOutlining();
+ // Do function outlining for region after early return block(s).
+ // NOTE: For vararg functions that do the vararg handling in the outlined
+ // function, we temporarily generate IR that does not properly
+ // forward varargs to the outlined function. Calling InlineFunction
+ // will update calls to the outlined functions to properly forward
+ // the varargs.
+ Function *doSingleRegionFunctionOutlining();
Function *OrigFunc = nullptr;
Function *ClonedFunc = nullptr;
- Function *OutlinedFunc = nullptr;
- BasicBlock *OutliningCallBB = nullptr;
+
+ typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
+ // Keep track of Outlined Functions and the basic block they're called from.
+ SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
+
// ClonedFunc is inlined in one of its callers after function
// outlining.
bool IsFunctionInlined = false;
// The cost of the region to be outlined.
int OutlinedRegionCost = 0;
+ // ClonedOI is specific to outlining non-early return blocks.
std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
+ // ClonedOMRI is specific to outlining cold regions.
+ std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
+ OptimizationRemarkEmitter &ORE;
};
private:
@@ -138,6 +271,7 @@ private:
std::function<TargetTransformInfo &(Function &)> *GetTTI;
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
ProfileSummaryInfo *PSI;
+ std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
// Return the frequency of the OutlininingBB relative to F's entry point.
// The result is no larger than 1 and is represented using BP.
@@ -148,8 +282,7 @@ private:
// Return true if the callee of CS should be partially inlined with
// profit.
bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
- BlockFrequency WeightedOutliningRcost,
- OptimizationRemarkEmitter &ORE);
+ BlockFrequency WeightedOutliningRcost);
// Try to inline DuplicateFunction (cloned from F with call to
// the OutlinedFunction into its callers. Return true
@@ -196,17 +329,20 @@ private:
// - The second value is the estimated size of the new call sequence in
// basic block Cloner.OutliningCallBB;
std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
+
// Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
// approximate both the size and runtime cost (Note that in the current
// inline cost analysis, there is no clear distinction there either).
static int computeBBInlineCost(BasicBlock *BB);
std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
-
+ std::unique_ptr<FunctionOutliningMultiRegionInfo>
+ computeOutliningColdRegionsInfo(Function *F);
};
struct PartialInlinerLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
+
PartialInlinerLegacyPass() : ModulePass(ID) {
initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
}
@@ -216,6 +352,7 @@ struct PartialInlinerLegacyPass : public ModulePass {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
+
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
@@ -225,6 +362,7 @@ struct PartialInlinerLegacyPass : public ModulePass {
&getAnalysis<TargetTransformInfoWrapperPass>();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ std::unique_ptr<OptimizationRemarkEmitter> UPORE;
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&ACT](Function &F) -> AssumptionCache & {
@@ -236,9 +374,185 @@ struct PartialInlinerLegacyPass : public ModulePass {
return TTIWP->getTTI(F);
};
- return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
+ std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
+ [&UPORE](Function &F) -> OptimizationRemarkEmitter & {
+ UPORE.reset(new OptimizationRemarkEmitter(&F));
+ return *UPORE.get();
+ };
+
+ return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI,
+ &GetORE)
+ .run(M);
}
};
+
+} // end anonymous namespace
+
+std::unique_ptr<FunctionOutliningMultiRegionInfo>
+PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) {
+ BasicBlock *EntryBlock = &F->front();
+
+ DominatorTree DT(*F);
+ LoopInfo LI(DT);
+ BranchProbabilityInfo BPI(*F, LI);
+ std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
+ BlockFrequencyInfo *BFI;
+ if (!GetBFI) {
+ ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
+ BFI = ScopedBFI.get();
+ } else
+ BFI = &(*GetBFI)(*F);
+
+ auto &ORE = (*GetORE)(*F);
+
+ // Return if we don't have profiling information.
+ if (!PSI->hasInstrumentationProfile())
+ return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
+
+ std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
+ llvm::make_unique<FunctionOutliningMultiRegionInfo>();
+
+ auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
+ BasicBlock *Dom = BlockList.front();
+ return BlockList.size() > 1 &&
+ std::distance(pred_begin(Dom), pred_end(Dom)) == 1;
+ };
+
+ auto IsSingleExit =
+ [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
+ BasicBlock *ExitBlock = nullptr;
+ for (auto *Block : BlockList) {
+ for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
+ if (!is_contained(BlockList, *SI)) {
+ if (ExitBlock) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
+ &SI->front())
+ << "Region dominated by "
+ << ore::NV("Block", BlockList.front()->getName())
+ << " has more than one region exit edge.";
+ });
+ return nullptr;
+ } else
+ ExitBlock = Block;
+ }
+ }
+ }
+ return ExitBlock;
+ };
+
+ auto BBProfileCount = [BFI](BasicBlock *BB) {
+ return BFI->getBlockProfileCount(BB)
+ ? BFI->getBlockProfileCount(BB).getValue()
+ : 0;
+ };
+
+ // Use the same computeBBInlineCost function to compute the cost savings of
+ // the outlining the candidate region.
+ int OverallFunctionCost = 0;
+ for (auto &BB : *F)
+ OverallFunctionCost += computeBBInlineCost(&BB);
+
+#ifndef NDEBUG
+ if (TracePartialInlining)
+ dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
+#endif
+ int MinOutlineRegionCost =
+ static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
+ BranchProbability MinBranchProbability(
+ static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
+ MinBlockCounterExecution);
+ bool ColdCandidateFound = false;
+ BasicBlock *CurrEntry = EntryBlock;
+ std::vector<BasicBlock *> DFS;
+ DenseMap<BasicBlock *, bool> VisitedMap;
+ DFS.push_back(CurrEntry);
+ VisitedMap[CurrEntry] = true;
+ // Use Depth First Search on the basic blocks to find CFG edges that are
+ // considered cold.
+ // Cold regions considered must also have its inline cost compared to the
+ // overall inline cost of the original function. The region is outlined only
+ // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
+ // more.
+ while (!DFS.empty()) {
+ auto *thisBB = DFS.back();
+ DFS.pop_back();
+ // Only consider regions with predecessor blocks that are considered
+ // not-cold (default: part of the top 99.99% of all block counters)
+ // AND greater than our minimum block execution count (default: 100).
+ if (PSI->isColdBB(thisBB, BFI) ||
+ BBProfileCount(thisBB) < MinBlockCounterExecution)
+ continue;
+ for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
+ if (VisitedMap[*SI])
+ continue;
+ VisitedMap[*SI] = true;
+ DFS.push_back(*SI);
+ // If branch isn't cold, we skip to the next one.
+ BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
+ if (SuccProb > MinBranchProbability)
+ continue;
+#ifndef NDEBUG
+ if (TracePartialInlining) {
+ dbgs() << "Found cold edge: " << thisBB->getName() << "->"
+ << (*SI)->getName() << "\nBranch Probability = " << SuccProb
+ << "\n";
+ }
+#endif
+ SmallVector<BasicBlock *, 8> DominateVector;
+ DT.getDescendants(*SI, DominateVector);
+ // We can only outline single entry regions (for now).
+ if (!IsSingleEntry(DominateVector))
+ continue;
+ BasicBlock *ExitBlock = nullptr;
+ // We can only outline single exit regions (for now).
+ if (!(ExitBlock = IsSingleExit(DominateVector)))
+ continue;
+ int OutlineRegionCost = 0;
+ for (auto *BB : DominateVector)
+ OutlineRegionCost += computeBBInlineCost(BB);
+
+#ifndef NDEBUG
+ if (TracePartialInlining)
+ dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
+#endif
+
+ if (OutlineRegionCost < MinOutlineRegionCost) {
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
+ &SI->front())
+ << ore::NV("Callee", F) << " inline cost-savings smaller than "
+ << ore::NV("Cost", MinOutlineRegionCost);
+ });
+ continue;
+ }
+ // For now, ignore blocks that belong to a SISE region that is a
+ // candidate for outlining. In the future, we may want to look
+ // at inner regions because the outer region may have live-exit
+ // variables.
+ for (auto *BB : DominateVector)
+ VisitedMap[BB] = true;
+ // ReturnBlock here means the block after the outline call
+ BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
+ // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
+ FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
+ DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
+ RegInfo.Region = DominateVector;
+ OutliningInfo->ORI.push_back(RegInfo);
+#ifndef NDEBUG
+ if (TracePartialInlining) {
+ dbgs() << "Found Cold Candidate starting at block: "
+ << DominateVector.front()->getName() << "\n";
+ }
+#endif
+ ColdCandidateFound = true;
+ NumColdRegionsFound++;
+ }
+ }
+ if (ColdCandidateFound)
+ return OutliningInfo;
+ else
+ return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
}
std::unique_ptr<FunctionOutliningInfo>
@@ -319,7 +633,6 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) {
OutliningInfo->Entries.push_back(CurrEntry);
CurrEntry = OtherSucc;
-
} while (true);
if (!CandidateFound)
@@ -413,15 +726,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
BranchProbability
PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
-
+ BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
auto EntryFreq =
Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
auto OutliningCallFreq =
- Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
-
- auto OutlineRegionRelFreq =
- BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
- EntryFreq.getFrequency());
+ Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
+ // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
+ // we outlined any regions, so we may encounter situations where the
+ // OutliningCallFreq is *slightly* bigger than the EntryFreq.
+ if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
+ OutliningCallFreq = EntryFreq;
+ }
+ auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
+ OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
return OutlineRegionRelFreq;
@@ -448,10 +765,10 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
}
bool PartialInlinerImpl::shouldPartialInline(
- CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
- OptimizationRemarkEmitter &ORE) {
-
+ CallSite CS, FunctionCloner &Cloner,
+ BlockFrequency WeightedOutliningRcost) {
using namespace ore;
+
if (SkipCostAnalysis)
return true;
@@ -461,30 +778,37 @@ bool PartialInlinerImpl::shouldPartialInline(
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
+ auto &ORE = (*GetORE)(*Caller);
InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
- *GetAssumptionCache, GetBFI, PSI);
+ *GetAssumptionCache, GetBFI, PSI, &ORE);
if (IC.isAlways()) {
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
<< NV("Callee", Cloner.OrigFunc)
- << " should always be fully inlined, not partially");
+ << " should always be fully inlined, not partially";
+ });
return false;
}
if (IC.isNever()) {
- ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
<< NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller)
- << " because it should never be inlined (cost=never)");
+ << " because it should never be inlined (cost=never)";
+ });
return false;
}
if (!IC) {
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
<< NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
- << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+ << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
+ });
return false;
}
const DataLayout &DL = Caller->getParent()->getDataLayout();
@@ -495,23 +819,28 @@ bool PartialInlinerImpl::shouldPartialInline(
// Weighted saving is smaller than weighted cost, return false
if (NormWeightedSavings < WeightedOutliningRcost) {
- ORE.emit(
- OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
- << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
- << NV("Caller", Caller) << " runtime overhead (overhead="
- << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
- << ", savings="
- << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
- << " of making the outlined call is too high");
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
+ Call)
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
+ << NV("Caller", Caller) << " runtime overhead (overhead="
+ << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
+ << ", savings="
+ << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
+ << ")"
+ << " of making the outlined call is too high";
+ });
return false;
}
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
<< NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
<< " (threshold="
- << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+ << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
+ });
return true;
}
@@ -566,23 +895,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
std::tuple<int, int>
PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
+ int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
+ for (auto FuncBBPair : Cloner.OutlinedFunctions) {
+ Function *OutlinedFunc = FuncBBPair.first;
+ BasicBlock* OutliningCallBB = FuncBBPair.second;
+ // Now compute the cost of the call sequence to the outlined function
+ // 'OutlinedFunction' in BB 'OutliningCallBB':
+ OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
- // Now compute the cost of the call sequence to the outlined function
- // 'OutlinedFunction' in BB 'OutliningCallBB':
- int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
-
- // Now compute the cost of the extracted/outlined function itself:
- int OutlinedFunctionCost = 0;
- for (BasicBlock &BB : *Cloner.OutlinedFunc) {
- OutlinedFunctionCost += computeBBInlineCost(&BB);
+ // Now compute the cost of the extracted/outlined function itself:
+ for (BasicBlock &BB : *OutlinedFunc)
+ OutlinedFunctionCost += computeBBInlineCost(&BB);
}
-
assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
+
// The code extractor introduces a new root and exit stub blocks with
// additional unconditional branches. Those branches will be eliminated
// later with bb layout. The cost should be adjusted accordingly:
- OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
+ OutlinedFunctionCost -=
+ 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
int OutliningRuntimeOverhead =
OutliningFuncCallCost +
@@ -636,9 +968,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
}
}
-PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
- FunctionOutliningInfo *OI)
- : OrigFunc(F) {
+PartialInlinerImpl::FunctionCloner::FunctionCloner(
+ Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
+ : OrigFunc(F), ORE(ORE) {
ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
@@ -659,8 +991,39 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
F->replaceAllUsesWith(ClonedFunc);
}
-void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
+PartialInlinerImpl::FunctionCloner::FunctionCloner(
+ Function *F, FunctionOutliningMultiRegionInfo *OI,
+ OptimizationRemarkEmitter &ORE)
+ : OrigFunc(F), ORE(ORE) {
+ ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
+
+ // Clone the function, so that we can hack away on it.
+ ValueToValueMapTy VMap;
+ ClonedFunc = CloneFunction(F, VMap);
+
+ // Go through all Outline Candidate Regions and update all BasicBlock
+ // information.
+ for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
+ OI->ORI) {
+ SmallVector<BasicBlock *, 8> Region;
+ for (BasicBlock *BB : RegionInfo.Region) {
+ Region.push_back(cast<BasicBlock>(VMap[BB]));
+ }
+ BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
+ BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
+ BasicBlock *NewReturnBlock = nullptr;
+ if (RegionInfo.ReturnBlock)
+ NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
+ FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
+ Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
+ ClonedOMRI->ORI.push_back(MappedRegionInfo);
+ }
+ // Go ahead and update all uses to the duplicate, so that we can just
+ // use the inliner functionality when we're done hacking.
+ F->replaceAllUsesWith(ClonedFunc);
+}
+void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
auto getFirstPHI = [](BasicBlock *BB) {
BasicBlock::iterator I = BB->begin();
PHINode *FirstPhi = nullptr;
@@ -676,6 +1039,11 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
return FirstPhi;
};
+ // Shouldn't need to normalize PHIs if we're not outlining non-early return
+ // blocks.
+ if (!ClonedOI)
+ return;
+
// Special hackery is needed with PHI nodes that have inputs from more than
// one extracted block. For simplicity, just split the PHIs into a two-level
// sequence of PHIs, some of which will go in the extracted region, and some
@@ -726,16 +1094,90 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
DeadPhis.push_back(OldPhi);
}
++I;
- }
- for (auto *DP : DeadPhis)
- DP->eraseFromParent();
+ }
+ for (auto *DP : DeadPhis)
+ DP->eraseFromParent();
+
+ for (auto E : ClonedOI->ReturnBlockPreds) {
+ E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
+ }
+}
+
+bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
+
+ auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
+ int Cost = 0;
+ for (BasicBlock* BB : Region)
+ Cost += computeBBInlineCost(BB);
+ return Cost;
+ };
+
+ assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
+
+ if (ClonedOMRI->ORI.empty())
+ return false;
+
+ // The CodeExtractor needs a dominator tree.
+ DominatorTree DT;
+ DT.recalculate(*ClonedFunc);
+
+ // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
+ LoopInfo LI(DT);
+ BranchProbabilityInfo BPI(*ClonedFunc, LI);
+ ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
- for (auto E : ClonedOI->ReturnBlockPreds) {
- E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
+ SetVector<Value *> Inputs, Outputs, Sinks;
+ for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
+ ClonedOMRI->ORI) {
+ int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
+
+ CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
+ ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
+
+ CE.findInputsOutputs(Inputs, Outputs, Sinks);
+
+#ifndef NDEBUG
+ if (TracePartialInlining) {
+ dbgs() << "inputs: " << Inputs.size() << "\n";
+ dbgs() << "outputs: " << Outputs.size() << "\n";
+ for (Value *value : Inputs)
+ dbgs() << "value used in func: " << *value << "\n";
+ for (Value *output : Outputs)
+ dbgs() << "instr used in func: " << *output << "\n";
}
+#endif
+ // Do not extract regions that have live exit variables.
+ if (Outputs.size() > 0 && !ForceLiveExit)
+ continue;
+
+ Function *OutlinedFunc = CE.extractCodeRegion();
+
+ if (OutlinedFunc) {
+ CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
+ BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
+ assert(OutliningCallBB->getParent() == ClonedFunc);
+ OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
+ NumColdRegionsOutlined++;
+ OutlinedRegionCost += CurrentOutlinedRegionCost;
+
+ if (MarkOutlinedColdCC) {
+ OutlinedFunc->setCallingConv(CallingConv::Cold);
+ OCS.setCallingConv(CallingConv::Cold);
+ }
+ } else
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
+ &RegionInfo.Region.front()->front())
+ << "Failed to extract region at block "
+ << ore::NV("Block", RegionInfo.Region.front());
+ });
+ }
+
+ return !OutlinedFunctions.empty();
}
-Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
+Function *
+PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// Returns true if the block is to be partial inlined into the caller
// (i.e. not to be extracted to the out of line function)
auto ToBeInlined = [&, this](BasicBlock *BB) {
@@ -744,6 +1186,16 @@ Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
ClonedOI->Entries.end());
};
+ assert(ClonedOI && "Expecting OutlineInfo for single region outline");
+ // The CodeExtractor needs a dominator tree.
+ DominatorTree DT;
+ DT.recalculate(*ClonedFunc);
+
+ // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
+ LoopInfo LI(DT);
+ BranchProbabilityInfo BPI(*ClonedFunc, LI);
+ ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
+
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
ToExtract.push_back(ClonedOI->NonReturnBlock);
@@ -759,26 +1211,27 @@ Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
OutlinedRegionCost += computeBBInlineCost(&BB);
}
- // The CodeExtractor needs a dominator tree.
- DominatorTree DT;
- DT.recalculate(*ClonedFunc);
-
- // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
- LoopInfo LI(DT);
- BranchProbabilityInfo BPI(*ClonedFunc, LI);
- ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
-
// Extract the body of the if.
- OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
- ClonedFuncBFI.get(), &BPI)
- .extractCodeRegion();
+ Function *OutlinedFunc =
+ CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
+ ClonedFuncBFI.get(), &BPI,
+ /* AllowVarargs */ true)
+ .extractCodeRegion();
if (OutlinedFunc) {
- OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
- .getInstruction()
- ->getParent();
+ BasicBlock *OutliningCallBB =
+ PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
+ .getInstruction()
+ ->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
- }
+ OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
+ } else
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
+ &ToExtract.front()->front())
+ << "Failed to extract region at block "
+ << ore::NV("Block", ToExtract.front());
+ });
return OutlinedFunc;
}
@@ -789,76 +1242,133 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
ClonedFunc->replaceAllUsesWith(OrigFunc);
ClonedFunc->eraseFromParent();
if (!IsFunctionInlined) {
- // Remove the function that is speculatively created if there is no
+ // Remove each function that was speculatively created if there is no
// reference.
- if (OutlinedFunc)
- OutlinedFunc->eraseFromParent();
+ for (auto FuncBBPair : OutlinedFunctions) {
+ Function *Func = FuncBBPair.first;
+ Func->eraseFromParent();
+ }
}
}
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (F->hasAddressTaken())
- return nullptr;
+ return {false, nullptr};
// Let inliner handle it
if (F->hasFnAttribute(Attribute::AlwaysInline))
- return nullptr;
+ return {false, nullptr};
if (F->hasFnAttribute(Attribute::NoInline))
- return nullptr;
+ return {false, nullptr};
if (PSI->isFunctionEntryCold(F))
- return nullptr;
+ return {false, nullptr};
if (F->user_begin() == F->user_end())
- return nullptr;
+ return {false, nullptr};
- std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
+ auto &ORE = (*GetORE)(*F);
+
+ // Only try to outline cold regions if we have a profile summary, which
+ // implies we have profiling information.
+ if (PSI->hasProfileSummary() && F->getEntryCount().hasValue() &&
+ !DisableMultiRegionPartialInline) {
+ std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
+ computeOutliningColdRegionsInfo(F);
+ if (OMRI) {
+ FunctionCloner Cloner(F, OMRI.get(), ORE);
+
+#ifndef NDEBUG
+ if (TracePartialInlining) {
+ dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
+ dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
+ << "\n";
+ }
+#endif
+ bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
+
+ if (DidOutline) {
+#ifndef NDEBUG
+ if (TracePartialInlining) {
+ dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
+ Cloner.ClonedFunc->print(dbgs());
+ dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
+ }
+#endif
+ if (tryPartialInline(Cloner))
+ return {true, nullptr};
+ }
+ }
+ }
+
+ // Fall-thru to regular partial inlining if we:
+ // i) can't find any cold regions to outline, or
+ // ii) can't inline the outlined function anywhere.
+ std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
if (!OI)
- return nullptr;
+ return {false, nullptr};
- FunctionCloner Cloner(F, OI.get());
+ FunctionCloner Cloner(F, OI.get(), ORE);
Cloner.NormalizeReturnBlock();
- Function *OutlinedFunction = Cloner.doFunctionOutlining();
+
+ Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
+
+ if (!OutlinedFunction)
+ return {false, nullptr};
bool AnyInline = tryPartialInline(Cloner);
if (AnyInline)
- return OutlinedFunction;
+ return {true, OutlinedFunction};
- return nullptr;
+ return {false, nullptr};
}
bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
- int NonWeightedRcost;
- int SizeCost;
-
- if (Cloner.OutlinedFunc == nullptr)
+ if (Cloner.OutlinedFunctions.empty())
return false;
+ int SizeCost = 0;
+ BlockFrequency WeightedRcost;
+ int NonWeightedRcost;
std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
- auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
- auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
+ // Only calculate RelativeToEntryFreq when we are doing single region
+ // outlining.
+ BranchProbability RelativeToEntryFreq;
+ if (Cloner.ClonedOI) {
+ RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
+ } else
+ // RelativeToEntryFreq doesn't make sense when we have more than one
+ // outlined call because each call will have a different relative frequency
+ // to the entry block. We can consider using the average, but the
+ // usefulness of that information is questionable. For now, assume we never
+ // execute the calls to outlined functions.
+ RelativeToEntryFreq = BranchProbability(0, 1);
+
+ WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
- // The call sequence to the outlined function is larger than the original
- // outlined region size, it does not increase the chances of inlining
- // the function with outlining (The inliner usies the size increase to
+ // The call sequence(s) to the outlined function(s) are larger than the sum of
+ // the original outlined region size(s), it does not increase the chances of
+ // inlining the function with outlining (The inliner uses the size increase to
// model the cost of inlining a callee).
if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
- OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
+ auto &ORE = (*GetORE)(*Cloner.OrigFunc);
DebugLoc DLoc;
BasicBlock *Block;
std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
DLoc, Block)
<< ore::NV("Function", Cloner.OrigFunc)
<< " not partially inlined into callers (Original Size = "
<< ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
<< ", Size of call sequence to outlined function = "
- << ore::NV("NewSize", SizeCost) << ")");
+ << ore::NV("NewSize", SizeCost) << ")";
+ });
return false;
}
@@ -882,18 +1392,26 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (IsLimitReached())
continue;
- OptimizationRemarkEmitter ORE(CS.getCaller());
- if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
+ if (!shouldPartialInline(CS, Cloner, WeightedRcost))
continue;
- ORE.emit(
- OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
- << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
- << ore::NV("Caller", CS.getCaller()));
+ auto &ORE = (*GetORE)(*CS.getCaller());
+ // Construct remark before doing the inlining, as after successful inlining
+ // the callsite is removed.
+ OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
+ OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
+ << ore::NV("Caller", CS.getCaller());
InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
- InlineFunction(CS, IFI);
+ // We can only forward varargs when we outlined a single region, else we
+ // bail on vararg functions.
+ if (!InlineFunction(CS, IFI, nullptr, true,
+ (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
+ : nullptr)))
+ continue;
+
+ ORE.emit(OR);
// Now update the entry count:
if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
@@ -904,13 +1422,23 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
AnyInline = true;
NumPartialInlining++;
// Update the stats
- NumPartialInlined++;
+ if (Cloner.ClonedOI)
+ NumPartialInlined++;
+ else
+ NumColdOutlinePartialInlined++;
+
}
if (AnyInline) {
Cloner.IsFunctionInlined = true;
if (CalleeEntryCount)
Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
+ auto &ORE = (*GetORE)(*Cloner.OrigFunc);
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
+ << "Partially inlined into at least one caller";
+ });
+
}
return AnyInline;
@@ -944,8 +1472,10 @@ bool PartialInlinerImpl::run(Module &M) {
if (Recursive)
continue;
- if (Function *NewFunc = unswitchFunction(CurrFunc)) {
- Worklist.push_back(NewFunc);
+ std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
+ if (Result.second)
+ Worklist.push_back(Result.second);
+ if (Result.first) {
Changed = true;
}
}
@@ -954,6 +1484,7 @@ bool PartialInlinerImpl::run(Module &M) {
}
char PartialInlinerLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
@@ -985,9 +1516,15 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
return FAM.getResult<TargetIRAnalysis>(F);
};
+ std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
+ [&FAM](Function &F) -> OptimizationRemarkEmitter & {
+ return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ };
+
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
+ if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE)
+ .run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 0b319f6a488b..3855e6245d8e 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -26,11 +26,9 @@
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
@@ -94,15 +92,6 @@ static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
-static cl::opt<bool> EnableNonLTOGlobalsModRef(
- "enable-non-lto-gmr", cl::init(true), cl::Hidden,
- cl::desc(
- "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
-
-static cl::opt<bool> EnableLoopLoadElim(
- "enable-loop-load-elim", cl::init(true), cl::Hidden,
- cl::desc("Enable the LoopLoadElimination Pass"));
-
static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
@@ -160,7 +149,6 @@ PassManagerBuilder::PassManagerBuilder() {
SizeLevel = 0;
LibraryInfo = nullptr;
Inliner = nullptr;
- DisableUnitAtATime = false;
DisableUnrollLoops = false;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
@@ -251,6 +239,7 @@ void PassManagerBuilder::addInstructionCombiningPass(
void PassManagerBuilder::populateFunctionPassManager(
legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
+ FPM.add(createEntryExitInstrumenterPass());
// Add LibraryInfo if we have some.
if (LibraryInfo)
@@ -428,6 +417,14 @@ void PassManagerBuilder::populateModulePassManager(
else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
+ if (PerformThinLTO) {
+ // Drop available_externally and unreferenced globals. This is necessary
+ // with ThinLTO in order to avoid leaving undefined references to dead
+ // globals in the object file.
+ MPM.add(createEliminateAvailableExternallyPass());
+ MPM.add(createGlobalDCEPass());
+ }
+
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
// Rename anon globals to be able to export them in the summary.
@@ -464,23 +461,25 @@ void PassManagerBuilder::populateModulePassManager(
if (PrepareForThinLTOUsingPGOSampleProfile)
DisableUnrollLoops = true;
- if (!DisableUnitAtATime) {
- // Infer attributes about declarations if possible.
- MPM.add(createInferFunctionAttrsLegacyPass());
+ // Infer attributes about declarations if possible.
+ MPM.add(createInferFunctionAttrsLegacyPass());
- addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+ addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
- MPM.add(createIPSCCPPass()); // IP SCCP
- MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
- // Promote any localized global vars.
- MPM.add(createPromoteMemoryToRegisterPass());
+ if (OptLevel > 2)
+ MPM.add(createCallSiteSplittingPass());
- MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+ MPM.add(createIPSCCPPass()); // IP SCCP
+ MPM.add(createCalledValuePropagationPass());
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ // Promote any localized global vars.
+ MPM.add(createPromoteMemoryToRegisterPass());
- addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
- addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
- }
+ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+
+ addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
+ addExtensionsToPM(EP_Peephole, MPM);
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
// For SamplePGO in ThinLTO compile phase, we do not want to do indirect
// call promotion as it will change the CFG too much to make the 2nd
@@ -490,21 +489,21 @@ void PassManagerBuilder::populateModulePassManager(
if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
- if (EnableNonLTOGlobalsModRef)
- // We add a module alias analysis pass here. In part due to bugs in the
- // analysis infrastructure this "works" in that the analysis stays alive
- // for the entire SCC pass run below.
- MPM.add(createGlobalsAAWrapperPass());
+ // We add a module alias analysis pass here. In part due to bugs in the
+ // analysis infrastructure this "works" in that the analysis stays alive
+ // for the entire SCC pass run below.
+ MPM.add(createGlobalsAAWrapperPass());
// Start of CallGraph SCC passes.
- if (!DisableUnitAtATime)
- MPM.add(createPruneEHPass()); // Remove dead EH info
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+ bool RunInliner = false;
if (Inliner) {
MPM.add(Inliner);
Inliner = nullptr;
+ RunInliner = true;
}
- if (!DisableUnitAtATime)
- MPM.add(createPostOrderFunctionAttrsLegacyPass());
+
+ MPM.add(createPostOrderFunctionAttrsLegacyPass());
if (OptLevel > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -515,11 +514,11 @@ void PassManagerBuilder::populateModulePassManager(
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+
if (RunPartialInlining)
MPM.add(createPartialInliningPass());
- if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO &&
- !PrepareForThinLTO)
+ if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO)
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
// these so they are eligible for inlining at link-time. Note if they
@@ -531,15 +530,26 @@ void PassManagerBuilder::populateModulePassManager(
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
- if (!DisableUnitAtATime)
- MPM.add(createReversePostOrderFunctionAttrsPass());
+ MPM.add(createReversePostOrderFunctionAttrsPass());
+
+ // The inliner performs some kind of dead code elimination as it goes,
+ // but there are cases that are not really caught by it. We might
+ // at some point consider teaching the inliner about them, but it
+ // is OK for now to run GlobalOpt + GlobalDCE in tandem as their
+ // benefits generally outweight the cost, making the whole pipeline
+ // faster.
+ if (RunInliner) {
+ MPM.add(createGlobalOptimizerPass());
+ MPM.add(createGlobalDCEPass());
+ }
// If we are planning to perform ThinLTO later, let's not bloat the code with
// unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
// during ThinLTO and perform the rest of the optimizations afterward.
if (PrepareForThinLTO) {
- // Reduce the size of the IR as much as possible.
- MPM.add(createGlobalOptimizerPass());
+ // Ensure we perform any last passes, but do so before renaming anonymous
+ // globals in case the passes add any.
+ addExtensionsToPM(EP_OptimizerLast, MPM);
// Rename anon globals to be able to export them in the summary.
MPM.add(createNameAnonGlobalPass());
return;
@@ -560,23 +570,22 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLICMPass()); // Hoist loop invariants
}
- if (EnableNonLTOGlobalsModRef)
- // We add a fresh GlobalsModRef run at this point. This is particularly
- // useful as the above will have inlined, DCE'ed, and function-attr
- // propagated everything. We should at this point have a reasonably minimal
- // and richly annotated call graph. By computing aliasing and mod/ref
- // information for all local globals here, the late loop passes and notably
- // the vectorizer will be able to use them to help recognize vectorizable
- // memory operations.
- //
- // Note that this relies on a bug in the pass manager which preserves
- // a module analysis into a function pass pipeline (and throughout it) so
- // long as the first function pass doesn't invalidate the module analysis.
- // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
- // this to work. Fortunately, it is trivial to preserve AliasAnalysis
- // (doing nothing preserves it as it is required to be conservatively
- // correct in the face of IR changes).
- MPM.add(createGlobalsAAWrapperPass());
+ // We add a fresh GlobalsModRef run at this point. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ //
+ // Note that this relies on a bug in the pass manager which preserves
+ // a module analysis into a function pass pipeline (and throughout it) so
+ // long as the first function pass doesn't invalidate the module analysis.
+ // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+ // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+ // (doing nothing preserves it as it is required to be conservatively
+ // correct in the face of IR changes).
+ MPM.add(createGlobalsAAWrapperPass());
MPM.add(createFloat2IntPass());
@@ -597,8 +606,7 @@ void PassManagerBuilder::populateModulePassManager(
// Eliminate loads by forwarding stores from the previous iteration to loads
// of the current iteration.
- if (EnableLoopLoadElim)
- MPM.add(createLoopLoadEliminationPass());
+ MPM.add(createLoopLoadEliminationPass());
// FIXME: Because of #pragma vectorize enable, the passes below are always
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
@@ -622,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager(
addInstructionCombiningPass(MPM);
}
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+ // GVN, loop transforms, and others have already run, so it's now better to
+ // convert to more optimized IR using more aggressive simplify CFG options.
+ // The extra sinking transform can create larger basic blocks, so do this
+ // before SLP vectorization.
+ MPM.add(createCFGSimplificationPass(1, true, true, false, true));
+
if (RunSLPAfterLoopVectorization && SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (OptLevel > 1 && ExtraVectorizerPasses) {
@@ -630,7 +645,6 @@ void PassManagerBuilder::populateModulePassManager(
}
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createLateCFGSimplificationPass()); // Switches to lookup tables
addInstructionCombiningPass(MPM);
if (!DisableUnrollLoops) {
@@ -650,16 +664,14 @@ void PassManagerBuilder::populateModulePassManager(
// about pointer alignments.
MPM.add(createAlignmentFromAssumptionsPass());
- if (!DisableUnitAtATime) {
- // FIXME: We shouldn't bother with this anymore.
- MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+ // FIXME: We shouldn't bother with this anymore.
+ MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
- // GlobalOpt already deletes dead functions and globals, at -O2 try a
- // late pass of GlobalDCE. It is capable of deleting dead cycles.
- if (OptLevel > 1) {
- MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
- MPM.add(createConstantMergePass()); // Merge dup global constants
- }
+ // GlobalOpt already deletes dead functions and globals, at -O2 try a
+ // late pass of GlobalDCE. It is capable of deleting dead cycles.
+ if (OptLevel > 1) {
+ MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
+ MPM.add(createConstantMergePass()); // Merge dup global constants
}
if (MergeFunctions)
@@ -673,6 +685,11 @@ void PassManagerBuilder::populateModulePassManager(
// Get rid of LCSSA nodes.
MPM.add(createInstructionSimplifierPass());
+ // This hoists/decomposes div/rem ops. It should run after other sink/hoist
+ // passes to avoid re-sinking, but before SimplifyCFG because it can allow
+ // flattening of blocks.
+ MPM.add(createDivRemPairsPass());
+
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
MPM.add(createCFGSimplificationPass());
@@ -695,6 +712,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createInferFunctionAttrsLegacyPass());
if (OptLevel > 1) {
+ // Split call-site with more constrained arguments.
+ PM.add(createCallSiteSplittingPass());
+
// Indirect call promotion. This should promote all the targets that are
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
@@ -706,6 +726,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
PM.add(createIPSCCPPass());
+
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ PM.add(createCalledValuePropagationPass());
}
// Infer attributes about definitions. The readnone attribute in particular is
@@ -936,8 +960,7 @@ LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
void
LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
LLVMBool Value) {
- PassManagerBuilder *Builder = unwrap(PMB);
- Builder->DisableUnitAtATime = Value;
+ // NOTE: The DisableUnitAtATime switch has been removed.
}
void
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 3fd59847a005..46b088189040 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -24,7 +24,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 6baada2c1ae1..f0e781b9d923 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -23,39 +23,65 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/SampleProfile.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include <cctype>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace sampleprof;
@@ -67,34 +93,39 @@ using namespace sampleprof;
static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+
static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample-profile-max-propagate-iterations", cl::init(100),
cl::desc("Maximum number of iterations to go through when propagating "
"sample block/edge weights through the CFG."));
+
static cl::opt<unsigned> SampleProfileRecordCoverage(
"sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
cl::desc("Emit a warning if less than N% of records in the input profile "
"are matched to the IR."));
+
static cl::opt<unsigned> SampleProfileSampleCoverage(
"sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
cl::desc("Emit a warning if less than N% of samples in the input profile "
"are matched to the IR."));
+
static cl::opt<double> SampleProfileHotThreshold(
"sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
cl::desc("Inlined functions that account for more than N% of all samples "
"collected in the parent function, will be inlined again."));
namespace {
-typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
-typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
-typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
-typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
-typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
- BlockEdgeMap;
+
+using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
+using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
+using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
+using EdgeWeightMap = DenseMap<Edge, uint64_t>;
+using BlockEdgeMap =
+ DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
class SampleCoverageTracker {
public:
- SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+ SampleCoverageTracker() = default;
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
uint32_t Discriminator, uint64_t Samples);
@@ -103,15 +134,16 @@ public:
unsigned countBodyRecords(const FunctionSamples *FS) const;
uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
uint64_t countBodySamples(const FunctionSamples *FS) const;
+
void clear() {
SampleCoverage.clear();
TotalUsedSamples = 0;
}
private:
- typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
- typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
- FunctionSamplesCoverageMap;
+ using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
+ using FunctionSamplesCoverageMap =
+ DenseMap<const FunctionSamples *, BodySampleCoverageMap>;
/// Coverage map for sampling records.
///
@@ -135,7 +167,7 @@ private:
/// and all the inlined callsites. Strictly, we should have a map of counters
/// keyed by FunctionSamples pointers, but these stats are cleared after
/// every function, so we just need to keep a single counter.
- uint64_t TotalUsedSamples;
+ uint64_t TotalUsedSamples = 0;
};
/// \brief Sample profile pass.
@@ -145,29 +177,31 @@ private:
/// profile information found in that file.
class SampleProfileLoader {
public:
- SampleProfileLoader(StringRef Name = SampleProfileFile)
- : DT(nullptr), PDT(nullptr), LI(nullptr), ACT(nullptr), Reader(),
- Samples(nullptr), Filename(Name), ProfileIsValid(false),
- TotalCollectedSamples(0) {}
+ SampleProfileLoader(
+ StringRef Name, bool IsThinLTOPreLink,
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache,
+ std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
+ : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
+ Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
- bool runOnModule(Module &M);
- void setACT(AssumptionCacheTracker *A) { ACT = A; }
+ bool runOnModule(Module &M, ModuleAnalysisManager *AM);
void dump() { Reader->dump(); }
protected:
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
std::vector<const FunctionSamples *>
- findIndirectCallFunctionSamples(const Instruction &I) const;
+ findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+ bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
- DenseSet<GlobalValue::GUID> &ImportGUIDs);
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -222,7 +256,8 @@ protected:
std::unique_ptr<PostDomTreeBase<BasicBlock>> PDT;
std::unique_ptr<LoopInfo> LI;
- AssumptionCacheTracker *ACT;
+ std::function<AssumptionCache &(Function &)> GetAC;
+ std::function<TargetTransformInfo &(Function &)> GetTTI;
/// \brief Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -236,19 +271,28 @@ protected:
std::unique_ptr<SampleProfileReader> Reader;
/// \brief Samples collected for the body of this function.
- FunctionSamples *Samples;
+ FunctionSamples *Samples = nullptr;
/// \brief Name of the profile file to load.
std::string Filename;
/// \brief Flag indicating whether the profile input loaded successfully.
- bool ProfileIsValid;
+ bool ProfileIsValid = false;
+
+ /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase.
+ ///
+ /// In this phase, in annotation, we should not promote indirect calls.
+ /// Instead, we will mark GUIDs that needs to be annotated to the function.
+ bool IsThinLTOPreLink;
/// \brief Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
/// at runtime.
- uint64_t TotalCollectedSamples;
+ uint64_t TotalCollectedSamples = 0;
+
+ /// \brief Optimization Remark Emitter used to emit diagnostic remarks.
+ OptimizationRemarkEmitter *ORE = nullptr;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -256,8 +300,15 @@ public:
// Class identification, replacement for typeinfo
static char ID;
- SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile)
- : ModulePass(ID), SampleLoader(Name) {
+ SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
+ bool IsThinLTOPreLink = false)
+ : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink,
+ [&](Function &F) -> AssumptionCache & {
+ return ACT->getAssumptionCache(F);
+ },
+ [&](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ }) {
initializeSampleProfileLoaderLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -267,17 +318,23 @@ public:
bool doInitialization(Module &M) override {
return SampleLoader.doInitialization(M);
}
+
StringRef getPassName() const override { return "Sample profile pass"; }
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
SampleProfileLoader SampleLoader;
+ AssumptionCacheTracker *ACT = nullptr;
+ TargetTransformInfoWrapperPass *TTIWP = nullptr;
};
+} // end anonymous namespace
+
/// Return true if the given callsite is hot wrt to its caller.
///
/// Functions that were inlined in the original binary will be represented
@@ -292,8 +349,8 @@ private:
///
/// If that fraction is larger than the default given by
/// SampleProfileHotThreshold, the callsite will be inlined again.
-bool callsiteIsHot(const FunctionSamples *CallerFS,
- const FunctionSamples *CallsiteFS) {
+static bool callsiteIsHot(const FunctionSamples *CallerFS,
+ const FunctionSamples *CallsiteFS) {
if (!CallsiteFS)
return false; // The callsite was not inlined in the original binary.
@@ -309,7 +366,6 @@ bool callsiteIsHot(const FunctionSamples *CallerFS,
(double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
return PercentSamples >= SampleProfileHotThreshold;
}
-}
/// Mark as used the sample record for the given function samples at
/// (LineOffset, Discriminator).
@@ -423,6 +479,7 @@ unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {
0xffff;
}
+#ifndef NDEBUG
/// \brief Print the weight of edge \p E on stream \p OS.
///
/// \param OS Stream to emit the output to.
@@ -453,6 +510,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
OS << "weight[" << BB->getName() << "]: " << W << "\n";
}
+#endif
/// \brief Get the weight for an instruction.
///
@@ -480,10 +538,12 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
return std::error_code();
- // If a call/invoke instruction is inlined in profile, but not inlined here,
+ // If a direct call/invoke instruction is inlined in profile
+ // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ !ImmutableCallSite(&Inst).isIndirectCall() &&
findCalleeFunctionSamples(Inst))
return 0;
@@ -495,13 +555,18 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
if (FirstMark) {
- const Function *F = Inst.getParent()->getParent();
- LLVMContext &Ctx = F->getContext();
- emitOptimizationRemark(
- Ctx, DEBUG_TYPE, *F, DLoc,
- Twine("Applied ") + Twine(*R) +
- " samples from profile (offset: " + Twine(LineOffset) +
- ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+ ORE->emit([&]() {
+ OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
+ Remark << "Applied " << ore::NV("NumSamples", *R);
+ Remark << " samples from profile (offset: ";
+ Remark << ore::NV("LineOffset", LineOffset);
+ if (Discriminator) {
+ Remark << ".";
+ Remark << ore::NV("Discriminator", Discriminator);
+ }
+ Remark << ")";
+ return Remark;
+ });
}
DEBUG(dbgs() << " " << DLoc.getLine() << "."
<< DIL->getBaseDiscriminator() << ":" << Inst
@@ -588,10 +653,11 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
}
/// Returns a vector of FunctionSamples that are the indirect call targets
-/// of \p Inst. The vector is sorted by the total number of samples.
+/// of \p Inst. The vector is sorted by the total number of samples. Stores
+/// the total call count of the indirect call in \p Sum.
std::vector<const FunctionSamples *>
SampleProfileLoader::findIndirectCallFunctionSamples(
- const Instruction &Inst) const {
+ const Instruction &Inst, uint64_t &Sum) const {
const DILocation *DIL = Inst.getDebugLoc();
std::vector<const FunctionSamples *> R;
@@ -603,16 +669,25 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
if (FS == nullptr)
return R;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ Sum = 0;
+ if (T)
+ for (const auto &T_C : T.get())
+ Sum += T_C.second;
if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
- if (M->size() == 0)
+ if (M->empty())
return R;
for (const auto &NameFS : *M) {
+ Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
std::sort(R.begin(), R.end(),
[](const FunctionSamples *L, const FunctionSamples *R) {
- return L->getTotalSamples() > R->getTotalSamples();
+ return L->getEntrySamples() > R->getEntrySamples();
});
}
return R;
@@ -650,6 +725,39 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return FS;
}
+bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
+ assert(isa<CallInst>(I) || isa<InvokeInst>(I));
+ CallSite CS(I);
+ Function *CalledFunction = CS.getCalledFunction();
+ assert(CalledFunction);
+ DebugLoc DLoc = I->getDebugLoc();
+ BasicBlock *BB = I->getParent();
+ InlineParams Params = getInlineParams();
+ Params.ComputeFullInlineCost = true;
+ // Checks if there is anything in the reachable portion of the callee at
+ // this callsite that makes this inlining potentially illegal. Need to
+ // set ComputeFullInlineCost, otherwise getInlineCost may return early
+ // when cost exceeds threshold without checking all IRs in the callee.
+ // The acutal cost does not matter because we only checks isNever() to
+ // see if it is legal to inline the callsite.
+ InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
+ None, nullptr, nullptr);
+ if (Cost.isNever()) {
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
+ << "incompatible inlining");
+ return false;
+ }
+ InlineFunctionInfo IFI(nullptr, &GetAC);
+ if (InlineFunction(CS, IFI)) {
+ // The call to InlineFunction erases I, so we can't pass it here.
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
+ << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
+ << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
+ return true;
+ }
+ return false;
+}
+
/// \brief Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
@@ -659,17 +767,14 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
-/// \param ImportGUIDs a set to be updated to include all GUIDs that come
-/// from a different module but inlined in the profiled binary.
+/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
+/// inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
bool SampleProfileLoader::inlineHotFunctions(
- Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
- LLVMContext &Ctx = F.getContext();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
- Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };
while (true) {
bool LocalChanged = false;
SmallVector<Instruction *, 10> CIS;
@@ -690,57 +795,59 @@ bool SampleProfileLoader::inlineHotFunctions(
}
}
for (auto I : CIS) {
- InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
Function *CalledFunction = CallSite(I).getCalledFunction();
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
- Instruction *DI = I;
- if (!CalledFunction && !PromotedInsns.count(I) &&
- CallSite(I).isIndirectCall())
- for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ if (CallSite(I).isIndirectCall()) {
+ if (PromotedInsns.count(I))
+ continue;
+ uint64_t Sum;
+ for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+ if (IsThinLTOPreLink) {
+ FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+ Samples->getTotalSamples() *
+ SampleProfileHotThreshold / 100);
+ continue;
+ }
auto CalleeFunctionName = FS->getName();
// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is
- // recursive. As llvm does not inline recursive calls, we will simply
- // ignore it instead of handling it explicitly.
+ // recursive. As llvm does not inline recursive calls, we will
+ // simply ignore it instead of handling it explicitly.
if (CalleeFunctionName == F.getName())
continue;
+
const char *Reason = "Callee function not available";
auto R = SymbolMap.find(CalleeFunctionName);
- if (R == SymbolMap.end())
- continue;
- CalledFunction = R->getValue();
- if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
- // The indirect target was promoted and inlined in the profile, as a
- // result, we do not have profile info for the branch probability.
- // We set the probability to 80% taken to indicate that the static
- // call is likely taken.
- DI = dyn_cast<Instruction>(
- promoteIndirectCall(I, CalledFunction, 80, 100, false)
- ->stripPointerCasts());
+ if (R != SymbolMap.end() && R->getValue() &&
+ !R->getValue()->isDeclaration() &&
+ R->getValue()->getSubprogram() &&
+ isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
+ uint64_t C = FS->getEntrySamples();
+ Instruction *DI =
+ pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
+ Sum -= C;
PromotedInsns.insert(I);
+ // If profile mismatches, we should not attempt to inline DI.
+ if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
+ inlineCallInstruction(DI))
+ LocalChanged = true;
} else {
- DEBUG(dbgs() << "\nFailed to promote indirect call to "
- << CalleeFunctionName << " because " << Reason
- << "\n");
- continue;
+ DEBUG(dbgs()
+ << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason << "\n");
}
}
- if (!CalledFunction || !CalledFunction->getSubprogram()) {
- findCalleeFunctionSamples(*I)->findImportedFunctions(
- ImportGUIDs, F.getParent(),
+ } else if (CalledFunction && CalledFunction->getSubprogram() &&
+ !CalledFunction->isDeclaration()) {
+ if (inlineCallInstruction(I))
+ LocalChanged = true;
+ } else if (IsThinLTOPreLink) {
+ findCalleeFunctionSamples(*I)->findInlinedFunctions(
+ InlinedGUIDs, F.getParent(),
Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
- continue;
- }
- DebugLoc DLoc = I->getDebugLoc();
- if (InlineFunction(CallSite(DI), IFI)) {
- LocalChanged = true;
- emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
- Twine("inlined hot callee '") +
- CalledFunction->getName() + "' into '" +
- F.getName() + "'");
}
}
if (LocalChanged) {
@@ -1076,24 +1183,20 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
}
-/// Sorts the CallTargetMap \p M by count in descending order and stores the
-/// sorted result in \p Sorted. Returns the total counts.
-static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted,
- const SampleRecord::CallTargetMap &M) {
- Sorted.clear();
- uint64_t Sum = 0;
- for (auto I = M.begin(); I != M.end(); ++I) {
- Sum += I->getValue();
- Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()});
- }
- std::sort(Sorted.begin(), Sorted.end(),
+/// Returns the sorted CallTargetMap \p M by count in descending order.
+static SmallVector<InstrProfValueData, 2> SortCallTargets(
+ const SampleRecord::CallTargetMap &M) {
+ SmallVector<InstrProfValueData, 2> R;
+ for (auto I = M.begin(); I != M.end(); ++I)
+ R.push_back({Function::getGUID(I->getKey()), I->getValue()});
+ std::sort(R.begin(), R.end(),
[](const InstrProfValueData &L, const InstrProfValueData &R) {
if (L.Count == R.Count)
return L.Value > R.Value;
else
return L.Count > R.Count;
});
- return Sum;
+ return R;
}
/// \brief Propagate weights into edges
@@ -1184,10 +1287,12 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!FS)
continue;
auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
- if (!T || T.get().size() == 0)
+ if (!T || T.get().empty())
continue;
- SmallVector<InstrProfValueData, 2> SortedCallTargets;
- uint64_t Sum = SortCallTargets(SortedCallTargets, T.get());
+ SmallVector<InstrProfValueData, 2> SortedCallTargets =
+ SortCallTargets(T.get());
+ uint64_t Sum;
+ findIndirectCallFunctionSamples(I, Sum);
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
SortedCallTargets, Sum, IPVK_IndirectCallTarget,
SortedCallTargets.size());
@@ -1211,7 +1316,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
<< ".\n");
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
- DebugLoc MaxDestLoc;
+ Instruction *MaxDestInst;
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
@@ -1230,7 +1335,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
- MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+ MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
}
}
}
@@ -1243,15 +1348,13 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// weights, the second pass does not need to set it.
if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
- TI->setMetadata(llvm::LLVMContext::MD_prof,
+ TI->setMetadata(LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
- emitOptimizationRemark(
- Ctx, DEBUG_TYPE, F, MaxDestLoc,
- Twine("most popular destination for conditional branches at ") +
- ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
- Twine(BranchLoc.getLine()) + ":" +
- Twine(BranchLoc.getCol()))
- : Twine("<UNKNOWN LOCATION>")));
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
+ << "most popular destination for conditional branches at "
+ << ore::NV("CondBranchesLoc", BranchLoc);
+ });
} else {
DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
}
@@ -1351,18 +1454,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- DenseSet<GlobalValue::GUID> ImportGUIDs;
- Changed |= inlineHotFunctions(F, ImportGUIDs);
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ Changed |= inlineHotFunctions(F, InlinedGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
// Add an entry count to the function using the samples gathered at the
- // function entry. Also sets the GUIDs that comes from a different
- // module but inlined in the profiled binary. This is aiming at making
- // the IR match the profiled binary before annotation.
- F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+ // function entry.
+ // Sets the GUIDs that are inlined in the profiled binary. This is used
+ // for ThinLink to make correct liveness analysis, and also make the IR
+ // match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
@@ -1404,9 +1508,11 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
}
char SampleProfileLoaderLegacyPass::ID = 0;
+
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
@@ -1431,7 +1537,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoaderLegacyPass(Name);
}
-bool SampleProfileLoader::runOnModule(Module &M) {
+bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
if (!ProfileIsValid)
return false;
@@ -1463,7 +1569,7 @@ bool SampleProfileLoader::runOnModule(Module &M) {
for (auto &F : M)
if (!F.isDeclaration()) {
clearFunctionData();
- retval |= runOnFunction(F);
+ retval |= runOnFunction(F, AM);
}
if (M.getProfileSummary() == nullptr)
M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
@@ -1471,13 +1577,23 @@ bool SampleProfileLoader::runOnModule(Module &M) {
}
bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
- // FIXME: pass in AssumptionCache correctly for the new pass manager.
- SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>());
- return SampleLoader.runOnModule(M);
+ ACT = &getAnalysis<AssumptionCacheTracker>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+ return SampleLoader.runOnModule(M, nullptr);
}
-bool SampleProfileLoader::runOnFunction(Function &F) {
+bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
F.setEntryCount(0);
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
+ if (AM) {
+ auto &FAM =
+ AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
+ .getManager();
+ ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ } else {
+ OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = OwnedORE.get();
+ }
Samples = Reader->getSamplesFor(F);
if (Samples && !Samples->empty())
return emitAnnotations(F);
@@ -1486,13 +1602,23 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
SampleProfileLoader SampleLoader(
- ProfileFileName.empty() ? SampleProfileFile : ProfileFileName);
+ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
+ IsThinLTOPreLink, GetAssumptionCache, GetTTI);
SampleLoader.doInitialization(M);
- if (!SampleLoader.runOnModule(M))
+ if (!SampleLoader.runOnModule(M, &AM))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 8ef6bb652309..945133074059 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
-#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
@@ -40,9 +39,17 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
continue;
auto Name = ExportGV.getName();
- GlobalValue *ImportGV = ImportM.getNamedValue(Name);
- if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV))
- continue;
+ GlobalValue *ImportGV = nullptr;
+ if (!PromoteExtra.count(&ExportGV)) {
+ ImportGV = ImportM.getNamedValue(Name);
+ if (!ImportGV)
+ continue;
+ ImportGV->removeDeadConstantUsers();
+ if (ImportGV->use_empty()) {
+ ImportGV->eraseFromParent();
+ continue;
+ }
+ }
std::string NewName = (Name + ModuleId).str();
@@ -141,7 +148,9 @@ void simplifyExternals(Module &M) {
continue;
}
- if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
+ if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||
+ // Changing the type of an intrinsic may invalidate the IR.
+ F.getName().startswith("llvm."))
continue;
Function *NewF =
@@ -376,15 +385,14 @@ void splitAndWriteThinLTOBitcode(
W.writeStrtab();
OS << Buffer;
- // If a minimized bitcode module was requested for the thin link,
- // strip the debug info (the merged module was already stripped above)
- // and write it to the given OS.
+ // If a minimized bitcode module was requested for the thin link, only
+ // the information that is needed by thin link will be written in the
+ // given OS (the merged module will be written as usual).
if (ThinLinkOS) {
Buffer.clear();
BitcodeWriter W2(Buffer);
StripDebugInfo(M);
- W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
- /*GenerateHash=*/false, &ModHash);
+ W2.writeThinLinkBitcode(&M, Index, ModHash);
W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
&MergedMIndex);
W2.writeSymtab();
@@ -420,14 +428,11 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
ModuleHash ModHash = {{0}};
WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
/*GenerateHash=*/true, &ModHash);
- // If a minimized bitcode module was requested for the thin link,
- // strip the debug info and write it to the given OS.
- if (ThinLinkOS) {
- StripDebugInfo(M);
- WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
- Index,
- /*GenerateHash=*/false, &ModHash);
- }
+ // If a minimized bitcode module was requested for the thin link, only
+ // the information that is needed by thin link will be written in the
+ // given OS.
+ if (ThinLinkOS && Index)
+ WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash);
}
class WriteThinLTOBitcode : public ModulePass {
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 00769cd63229..ec56f0cde25d 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -51,14 +51,13 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -275,18 +274,39 @@ struct VirtualCallSite {
// of that field for details.
unsigned *NumUnsafeUses;
- void emitRemark(const Twine &OptName, const Twine &TargetName) {
+ void
+ emitRemark(const StringRef OptName, const StringRef TargetName,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
Function *F = CS.getCaller();
- emitOptimizationRemark(
- F->getContext(), DEBUG_TYPE, *F,
- CS.getInstruction()->getDebugLoc(),
- OptName + ": devirtualized a call to " + TargetName);
+ DebugLoc DLoc = CS->getDebugLoc();
+ BasicBlock *Block = CS.getParent();
+
+ // In the new pass manager, we can request the optimization
+ // remark emitter pass on a per-function-basis, which the
+ // OREGetter will do for us.
+ // In the old pass manager, this is harder, so we just build
+ // a optimization remark emitter on the fly, when we need it.
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
+ OptimizationRemarkEmitter *ORE;
+ if (OREGetter)
+ ORE = &OREGetter(F);
+ else {
+ OwnedORE = make_unique<OptimizationRemarkEmitter>(F);
+ ORE = OwnedORE.get();
+ }
+
+ using namespace ore;
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
+ << NV("Optimization", OptName) << ": devirtualized a call to "
+ << NV("FunctionName", TargetName));
}
- void replaceAndErase(const Twine &OptName, const Twine &TargetName,
- bool RemarksEnabled, Value *New) {
+ void replaceAndErase(
+ const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
+ Value *New) {
if (RemarksEnabled)
- emitRemark(OptName, TargetName);
+ emitRemark(OptName, TargetName, OREGetter);
CS->replaceAllUsesWith(New);
if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) {
BranchInst::Create(II->getNormalDest(), CS.getInstruction());
@@ -383,6 +403,7 @@ struct DevirtModule {
IntegerType *IntPtrTy;
bool RemarksEnabled;
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
MapVector<VTableSlot, VTableSlotInfo> CallSlots;
@@ -397,6 +418,7 @@ struct DevirtModule {
std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
: M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
@@ -405,7 +427,7 @@ struct DevirtModule {
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
- RemarksEnabled(areRemarksEnabled()) {
+ RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
assert(!(ExportSummary && ImportSummary));
}
@@ -444,16 +466,23 @@ struct DevirtModule {
std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name);
+ bool shouldExportConstantsAsAbsoluteSymbols();
+
// This function is called during the export phase to create a symbol
// definition containing information about the given vtable slot and list of
// arguments.
void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
Constant *C);
+ void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
+ uint32_t Const, uint32_t &Storage);
// This function is called during the import phase to create a reference to
// the symbol definition created during the export phase.
Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
- StringRef Name, unsigned AbsWidth = 0);
+ StringRef Name);
+ Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, IntegerType *IntTy,
+ uint32_t Storage);
void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
Constant *UniqueMemberAddr);
@@ -482,8 +511,9 @@ struct DevirtModule {
// Lower the module using the action and summary passed as command line
// arguments. For testing purposes only.
- static bool runForTesting(Module &M,
- function_ref<AAResults &(Function &)> AARGetter);
+ static bool runForTesting(
+ Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
};
struct WholeProgramDevirt : public ModulePass {
@@ -508,9 +538,14 @@ struct WholeProgramDevirt : public ModulePass {
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
+
+ auto OREGetter = function_ref<OptimizationRemarkEmitter &(Function *)>();
+
if (UseCommandLine)
- return DevirtModule::runForTesting(M, LegacyAARGetter(*this));
- return DevirtModule(M, LegacyAARGetter(*this), ExportSummary, ImportSummary)
+ return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter);
+
+ return DevirtModule(M, LegacyAARGetter(*this), OREGetter, ExportSummary,
+ ImportSummary)
.run();
}
@@ -542,13 +577,17 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
auto AARGetter = [&](Function &F) -> AAResults & {
return FAM.getResult<AAManager>(F);
};
- if (!DevirtModule(M, AARGetter, nullptr, nullptr).run())
+ auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
+ return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
+ };
+ if (!DevirtModule(M, AARGetter, OREGetter, nullptr, nullptr).run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
bool DevirtModule::runForTesting(
- Module &M, function_ref<AAResults &(Function &)> AARGetter) {
+ Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
ModuleSummaryIndex Summary;
// Handle the command-line summary arguments. This code is for testing
@@ -566,7 +605,7 @@ bool DevirtModule::runForTesting(
bool Changed =
DevirtModule(
- M, AARGetter,
+ M, AARGetter, OREGetter,
ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
.run();
@@ -684,7 +723,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
auto Apply = [&](CallSiteInfo &CSInfo) {
for (auto &&VCallSite : CSInfo.CallSites) {
if (RemarksEnabled)
- VCallSite.emitRemark("single-impl", TheFn->getName());
+ VCallSite.emitRemark("single-impl", TheFn->getName(), OREGetter);
VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
TheFn, VCallSite.CS.getCalledValue()->getType()));
// This use is no longer unsafe.
@@ -724,9 +763,24 @@ bool DevirtModule::trySingleImplDevirt(
// to make it visible to thin LTO objects. We can only get here during the
// ThinLTO export phase.
if (TheFn->hasLocalLinkage()) {
+ std::string NewName = (TheFn->getName() + "$merged").str();
+
+ // Since we are renaming the function, any comdats with the same name must
+ // also be renamed. This is required when targeting COFF, as the comdat name
+ // must match one of the names of the symbols in the comdat.
+ if (Comdat *C = TheFn->getComdat()) {
+ if (C->getName() == TheFn->getName()) {
+ Comdat *NewC = M.getOrInsertComdat(NewName);
+ NewC->setSelectionKind(C->getSelectionKind());
+ for (GlobalObject &GO : M.global_objects())
+ if (GO.getComdat() == C)
+ GO.setComdat(NewC);
+ }
+ }
+
TheFn->setLinkage(GlobalValue::ExternalLinkage);
TheFn->setVisibility(GlobalValue::HiddenVisibility);
- TheFn->setName(TheFn->getName() + "$merged");
+ TheFn->setName(NewName);
}
Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
@@ -769,7 +823,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
uint64_t TheRetVal) {
for (auto Call : CSInfo.CallSites)
Call.replaceAndErase(
- "uniform-ret-val", FnName, RemarksEnabled,
+ "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal));
CSInfo.markDevirt();
}
@@ -808,6 +862,12 @@ std::string DevirtModule::getGlobalName(VTableSlot Slot,
return OS.str();
}
+bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
+ Triple T(M.getTargetTriple());
+ return (T.getArch() == Triple::x86 || T.getArch() == Triple::x86_64) &&
+ T.getObjectFormat() == Triple::ELF;
+}
+
void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name, Constant *C) {
GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
@@ -815,27 +875,55 @@ void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
GA->setVisibility(GlobalValue::HiddenVisibility);
}
+void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, uint32_t Const,
+ uint32_t &Storage) {
+ if (shouldExportConstantsAsAbsoluteSymbols()) {
+ exportGlobal(
+ Slot, Args, Name,
+ ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
+ return;
+ }
+
+ Storage = Const;
+}
+
Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
- StringRef Name, unsigned AbsWidth) {
+ StringRef Name) {
Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
auto *GV = dyn_cast<GlobalVariable>(C);
+ if (GV)
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ return C;
+}
+
+Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, IntegerType *IntTy,
+ uint32_t Storage) {
+ if (!shouldExportConstantsAsAbsoluteSymbols())
+ return ConstantInt::get(IntTy, Storage);
+
+ Constant *C = importGlobal(Slot, Args, Name);
+ auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
+ C = ConstantExpr::getPtrToInt(C, IntTy);
+
// We only need to set metadata if the global is newly created, in which
// case it would not have hidden visibility.
- if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ if (GV->getMetadata(LLVMContext::MD_absolute_symbol))
return C;
- GV->setVisibility(GlobalValue::HiddenVisibility);
auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
GV->setMetadata(LLVMContext::MD_absolute_symbol,
MDNode::get(M.getContext(), {MinC, MaxC}));
};
+ unsigned AbsWidth = IntTy->getBitWidth();
if (AbsWidth == IntPtrTy->getBitWidth())
SetAbsRange(~0ull, ~0ull); // Full set.
- else if (AbsWidth)
+ else
SetAbsRange(0, 1ull << AbsWidth);
- return GV;
+ return C;
}
void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
@@ -843,10 +931,12 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
Constant *UniqueMemberAddr) {
for (auto &&Call : CSInfo.CallSites) {
IRBuilder<> B(Call.CS.getInstruction());
- Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- Call.VTable, UniqueMemberAddr);
+ Value *Cmp =
+ B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ B.CreateBitCast(Call.VTable, Int8PtrTy), UniqueMemberAddr);
Cmp = B.CreateZExt(Cmp, Call.CS->getType());
- Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, Cmp);
+ Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
+ Cmp);
}
CSInfo.markDevirt();
}
@@ -909,17 +999,19 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
for (auto Call : CSInfo.CallSites) {
auto *RetType = cast<IntegerType>(Call.CS.getType());
IRBuilder<> B(Call.CS.getInstruction());
- Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
+ Value *Addr =
+ B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
if (RetType->getBitWidth() == 1) {
Value *Bits = B.CreateLoad(Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
- IsBitSet);
+ OREGetter, IsBitSet);
} else {
Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
Value *Val = B.CreateLoad(RetType, ValAddr);
- Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, Val);
+ Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
+ OREGetter, Val);
}
}
CSInfo.markDevirt();
@@ -1007,18 +1099,18 @@ bool DevirtModule::tryVirtualConstProp(
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
- Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
- Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
if (CSByConstantArg.second.isExported()) {
ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
- exportGlobal(Slot, CSByConstantArg.first, "byte",
- ConstantExpr::getIntToPtr(ByteConst, Int8PtrTy));
- exportGlobal(Slot, CSByConstantArg.first, "bit",
- ConstantExpr::getIntToPtr(BitConst, Int8PtrTy));
+ exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
+ ResByArg->Byte);
+ exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
+ ResByArg->Bit);
}
// Rewrite each call to a load from OffsetByte/OffsetBit.
+ Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
+ Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
applyVirtualConstProp(CSByConstantArg.second,
TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
}
@@ -1112,8 +1204,7 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
if (SeenPtrs.insert(Ptr).second) {
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].addCallSite(CI->getArgOperand(0),
- Call.CS, nullptr);
+ CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, nullptr);
}
}
}
@@ -1250,10 +1341,10 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
break;
}
case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
- Constant *Byte = importGlobal(Slot, CSByConstantArg.first, "byte", 32);
- Byte = ConstantExpr::getPtrToInt(Byte, Int32Ty);
- Constant *Bit = importGlobal(Slot, CSByConstantArg.first, "bit", 8);
- Bit = ConstantExpr::getPtrToInt(Bit, Int8Ty);
+ Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
+ Int32Ty, ResByArg.Byte);
+ Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
+ ResByArg.Bit);
applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
}
default:
@@ -1406,9 +1497,24 @@ bool DevirtModule::run() {
// Generate remarks for each devirtualized function.
for (const auto &DT : DevirtTargets) {
Function *F = DT.second;
- DISubprogram *SP = F->getSubprogram();
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, SP,
- Twine("devirtualized ") + F->getName());
+
+ // In the new pass manager, we can request the optimization
+ // remark emitter pass on a per-function-basis, which the
+ // OREGetter will do for us.
+ // In the old pass manager, this is harder, so we just build
+ // a optimization remark emitter on the fly, when we need it.
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
+ OptimizationRemarkEmitter *ORE;
+ if (OREGetter)
+ ORE = &OREGetter(F);
+ else {
+ OwnedORE = make_unique<OptimizationRemarkEmitter>(F);
+ ORE = OwnedORE.get();
+ }
+
+ using namespace ore;
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
+ << "devirtualized " << NV("FunctionName", F->getName()));
}
}