summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO/GlobalOpt.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO/GlobalOpt.cpp')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp422
1 files changed, 368 insertions, 54 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4bb2984e3b47..1af7e6894777 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -17,14 +17,16 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -55,6 +57,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -63,7 +66,6 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <utility>
@@ -88,6 +90,21 @@ STATISTIC(NumNestRemoved , "Number of nest attributes removed");
STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+STATISTIC(NumInternalFunc, "Number of internal functions");
+STATISTIC(NumColdCC, "Number of functions marked coldcc");
+
+static cl::opt<bool>
+ EnableColdCCStressTest("enable-coldcc-stress-test",
+ cl::desc("Enable stress test of coldcc by adding "
+ "calling conv to all internal functions."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<int> ColdCCRelFreq(
+ "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ cl::desc(
+ "Maximum block frequency, expressed as a percentage of caller's "
+ "entry frequency, for a call site to be considered cold for enabling"
+ "coldcc"));
/// Is this global variable possibly used by a leak checker as a root? If so,
/// we might not really want to eliminate the stores to it.
@@ -483,7 +500,6 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
StartAlignment = DL.getABITypeAlignment(GV->getType());
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- uint64_t FragmentOffset = 0;
unsigned NumElements = STy->getNumElements();
NewGlobals.reserve(NumElements);
const StructLayout &Layout = *DL.getStructLayout(STy);
@@ -509,10 +525,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
- FragmentOffset = alignTo(FragmentOffset, NewAlign);
- uint64_t Size = DL.getTypeSizeInBits(NGV->getValueType());
- transferSRADebugInfo(GV, NGV, FragmentOffset, Size, NumElements);
- FragmentOffset += Size;
+ uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
+ uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i);
+ transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements);
}
} else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
unsigned NumElements = STy->getNumElements();
@@ -522,7 +537,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
auto ElTy = STy->getElementType();
uint64_t EltSize = DL.getTypeAllocSize(ElTy);
unsigned EltAlign = DL.getABITypeAlignment(ElTy);
- uint64_t FragmentSizeInBits = DL.getTypeSizeInBits(ElTy);
+ uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
@@ -551,7 +566,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (NewGlobals.empty())
return nullptr;
- DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
+ LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -621,7 +636,13 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
/// reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSetImpl<const PHINode*> &PHIs) {
- for (const User *U : V->users())
+ for (const User *U : V->users()) {
+ if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ // If null pointer is considered valid, then all uses are non-trapping.
+ // Non address-space 0 globals have already been pruned by the caller.
+ if (NullPointerIsDefined(I->getFunction()))
+ return false;
+ }
if (isa<LoadInst>(U)) {
// Will trap.
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -655,7 +676,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
//cerr << "NONTRAPPING USE: " << *U;
return false;
}
-
+ }
return true;
}
@@ -682,6 +703,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
Instruction *I = cast<Instruction>(*UI++);
+ // Uses are non-trapping if null pointer is considered valid.
+ // Non address-space 0 globals are already pruned by the caller.
+ if (NullPointerIsDefined(I->getFunction()))
+ return false;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setOperand(0, NewV);
Changed = true;
@@ -783,7 +808,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
}
if (Changed) {
- DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
+ LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
+ << "\n");
++NumGlobUses;
}
@@ -797,7 +823,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
}
if (GV->use_empty()) {
- DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
+ LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
Changed = true;
GV->eraseFromParent();
++NumDeleted;
@@ -833,7 +859,8 @@ static GlobalVariable *
OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
ConstantInt *NElements, const DataLayout &DL,
TargetLibraryInfo *TLI) {
- DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
+ LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI
+ << '\n');
Type *GlobalType;
if (NElements->getZExtValue() == 1)
@@ -1269,7 +1296,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
+ LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI
+ << '\n');
Type *MAT = getMallocAllocatedType(CI, TLI);
StructType *STy = cast<StructType>(MAT);
@@ -1566,7 +1594,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// users of the loaded value (often calls and loads) that would trap if the
// value was null.
if (GV->getInitializer()->getType()->isPointerTy() &&
- GV->getInitializer()->isNullValue()) {
+ GV->getInitializer()->isNullValue() &&
+ !NullPointerIsDefined(
+ nullptr /* F */,
+ GV->getInitializer()->getType()->getPointerAddressSpace())) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
@@ -1608,7 +1639,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
- DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
+ LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
@@ -1652,15 +1683,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// val * (ValOther - ValInit) + ValInit:
// DW_OP_deref DW_OP_constu <ValMinus>
// DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
- E = DIExpression::get(NewGV->getContext(),
- {dwarf::DW_OP_deref,
- dwarf::DW_OP_constu,
- ValMinus,
- dwarf::DW_OP_mul,
- dwarf::DW_OP_constu,
- ValInit,
- dwarf::DW_OP_plus,
- dwarf::DW_OP_stack_value});
+ SmallVector<uint64_t, 12> Ops = {
+ dwarf::DW_OP_deref, dwarf::DW_OP_constu, ValMinus,
+ dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
+ dwarf::DW_OP_plus};
+ E = DIExpression::prependOpcodes(E, Ops, DIExpression::WithStackValue);
DIGlobalVariableExpression *DGVE =
DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
NewGV->addDebugInfo(DGVE);
@@ -1732,8 +1759,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
-static bool deleteIfDead(GlobalValue &GV,
- SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
+static bool deleteIfDead(
+ GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
GV.removeDeadConstantUsers();
if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
@@ -1751,7 +1778,7 @@ static bool deleteIfDead(GlobalValue &GV,
if (!Dead)
return false;
- DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
GV.eraseFromParent();
++NumDeleted;
return true;
@@ -1917,7 +1944,7 @@ static bool processInternalGlobal(
LookupDomTree)) {
const DataLayout &DL = GV->getParent()->getDataLayout();
- DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
+ LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getValueType();
@@ -1938,7 +1965,7 @@ static bool processInternalGlobal(
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
if (!GS.IsLoaded) {
- DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
+ LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
bool Changed;
if (isLeakCheckerRoot(GV)) {
@@ -1960,7 +1987,7 @@ static bool processInternalGlobal(
}
if (GS.StoredType <= GlobalStatus::InitializerStored) {
- DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
+ LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
@@ -1968,8 +1995,8 @@ static bool processInternalGlobal(
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
- DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
- << "all users and delete global!\n");
+ LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
return true;
@@ -1997,8 +2024,8 @@ static bool processInternalGlobal(
CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
if (GV->use_empty()) {
- DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
+ LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
GV->eraseFromParent();
++NumDeleted;
}
@@ -2097,20 +2124,142 @@ static void RemoveNestAttribute(Function *F) {
/// idea here is that we don't want to mess with the convention if the user
/// explicitly requested something with performance implications like coldcc,
/// GHC, or anyregcc.
-static bool isProfitableToMakeFastCC(Function *F) {
+static bool hasChangeableCC(Function *F) {
CallingConv::ID CC = F->getCallingConv();
+
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
- return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
+ if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
+ return false;
+
+ // FIXME: Change CC for the whole chain of musttail calls when possible.
+ //
+ // Can't change CC of the function that either has musttail calls, or is a
+ // musttail callee itself
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+ CallInst* CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ if (CI->isMustTailCall())
+ return false;
+ }
+
+ for (BasicBlock &BB : *F)
+ if (BB.getTerminatingMustTailCall())
+ return false;
+
+ return true;
+}
+
+/// Return true if the block containing the call site has a BlockFrequency of
+/// less than ColdCCRelFreq% of the entry block.
+static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) {
+ const BranchProbability ColdProb(ColdCCRelFreq, 100);
+ auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
+ auto CallerEntryFreq =
+ CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
+// This function checks if the input function F is cold at all call sites. It
+// also looks each call site's containing function, returning false if the
+// caller function contains other non cold calls. The input vector AllCallsCold
+// contains a list of functions that only have call sites in cold blocks.
+static bool
+isValidCandidateForColdCC(Function &F,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+ const std::vector<Function *> &AllCallsCold) {
+
+ if (F.user_empty())
+ return false;
+
+ for (User *U : F.users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+
+ CallSite CS(cast<Instruction>(U));
+ Function *CallerFunc = CS.getInstruction()->getParent()->getParent();
+ BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
+ if (!isColdCallSite(CS, CallerBFI))
+ return false;
+ auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
+ if (It == AllCallsCold.end())
+ return false;
+ }
+ return true;
+}
+
+static void changeCallSitesToColdCC(Function *F) {
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+ CallSite CS(cast<Instruction>(U));
+ CS.setCallingConv(CallingConv::Cold);
+ }
+}
+
+// This function iterates over all the call instructions in the input Function
+// and checks that all call sites are in cold blocks and are allowed to use the
+// coldcc calling convention.
+static bool
+hasOnlyColdCalls(Function &F,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ CallSite CS(cast<Instruction>(CI));
+ // Skip over isline asm instructions since they aren't function calls.
+ if (CI->isInlineAsm())
+ continue;
+ Function *CalledFn = CI->getCalledFunction();
+ if (!CalledFn)
+ return false;
+ if (!CalledFn->hasLocalLinkage())
+ return false;
+ // Skip over instrinsics since they won't remain as function calls.
+ if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
+ continue;
+ // Check if it's valid to use coldcc calling convention.
+ if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
+ CalledFn->hasAddressTaken())
+ return false;
+ BlockFrequencyInfo &CallerBFI = GetBFI(F);
+ if (!isColdCallSite(CS, CallerBFI))
+ return false;
+ }
+ }
+ }
+ return true;
}
static bool
OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
- SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
+
bool Changed = false;
+
+ std::vector<Function *> AllCallsCold;
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
+ Function *F = &*FI++;
+ if (hasOnlyColdCalls(*F, GetBFI))
+ AllCallsCold.push_back(F);
+ }
+
// Optimize functions.
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
Function *F = &*FI++;
+
+ // Don't perform global opt pass on naked functions; we don't want fast
+ // calling conventions for naked functions.
+ if (F->hasFnAttribute(Attribute::Naked))
+ continue;
+
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
@@ -2142,7 +2291,25 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
if (!F->hasLocalLinkage())
continue;
- if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+
+ if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
+ NumInternalFunc++;
+ TargetTransformInfo &TTI = GetTTI(*F);
+ // Change the calling convention to coldcc if either stress testing is
+ // enabled or the target would like to use coldcc on functions which are
+ // cold at all call sites and the callers contain no other non coldcc
+ // calls.
+ if (EnableColdCCStressTest ||
+ (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) &&
+ TTI.useColdCCForColdCall(*F))) {
+ F->setCallingConv(CallingConv::Cold);
+ changeCallSitesToColdCC(F);
+ Changed = true;
+ NumColdCC++;
+ }
+ }
+
+ if (hasChangeableCC(F) && !F->isVarArg() &&
!F->hasAddressTaken()) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
@@ -2168,7 +2335,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
static bool
OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
- SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
@@ -2254,6 +2421,131 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
}
+/// Given a map of address -> value, where addresses are expected to be some form
+/// of either a global or a constant GEP, set the initializer for the address to
+/// be the value. This performs mostly the same function as CommitValueTo()
+/// and EvaluateStoreInto() but is optimized to be more efficient for the common
+/// case where the set of addresses are GEPs sharing the same underlying global,
+/// processing the GEPs in batches rather than individually.
+///
+/// To give an example, consider the following C++ code adapted from the clang
+/// regression tests:
+/// struct S {
+/// int n = 10;
+/// int m = 2 * n;
+/// S(int a) : n(a) {}
+/// };
+///
+/// template<typename T>
+/// struct U {
+/// T *r = &q;
+/// T q = 42;
+/// U *p = this;
+/// };
+///
+/// U<S> e;
+///
+/// The global static constructor for 'e' will need to initialize 'r' and 'p' of
+/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm'
+/// members. This batch algorithm will simply use general CommitValueTo() method
+/// to handle the complex nested S struct initialization of 'q', before
+/// processing the outermost members in a single batch. Using CommitValueTo() to
+/// handle member in the outer struct is inefficient when the struct/array is
+/// very large as we end up creating and destroy constant arrays for each
+/// initialization.
+/// For the above case, we expect the following IR to be generated:
+///
+/// %struct.U = type { %struct.S*, %struct.S, %struct.U* }
+/// %struct.S = type { i32, i32 }
+/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e,
+/// i64 0, i32 1),
+/// %struct.S { i32 42, i32 84 }, %struct.U* @e }
+/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex
+/// constant expression, while the other two elements of @e are "simple".
+static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) {
+ SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs;
+ SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs;
+ SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs;
+ SimpleCEs.reserve(Mem.size());
+
+ for (const auto &I : Mem) {
+ if (auto *GV = dyn_cast<GlobalVariable>(I.first)) {
+ GVs.push_back(std::make_pair(GV, I.second));
+ } else {
+ ConstantExpr *GEP = cast<ConstantExpr>(I.first);
+ // We don't handle the deeply recursive case using the batch method.
+ if (GEP->getNumOperands() > 3)
+ ComplexCEs.push_back(std::make_pair(GEP, I.second));
+ else
+ SimpleCEs.push_back(std::make_pair(GEP, I.second));
+ }
+ }
+
+ // The algorithm below doesn't handle cases like nested structs, so use the
+ // slower fully general method if we have to.
+ for (auto ComplexCE : ComplexCEs)
+ CommitValueTo(ComplexCE.second, ComplexCE.first);
+
+ for (auto GVPair : GVs) {
+ assert(GVPair.first->hasInitializer());
+ GVPair.first->setInitializer(GVPair.second);
+ }
+
+ if (SimpleCEs.empty())
+ return;
+
+ // We cache a single global's initializer elements in the case where the
+ // subsequent address/val pair uses the same one. This avoids throwing away and
+ // rebuilding the constant struct/vector/array just because one element is
+ // modified at a time.
+ SmallVector<Constant *, 32> Elts;
+ Elts.reserve(SimpleCEs.size());
+ GlobalVariable *CurrentGV = nullptr;
+
+ auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) {
+ Constant *Init = GV->getInitializer();
+ Type *Ty = Init->getType();
+ if (Update) {
+ if (CurrentGV) {
+ assert(CurrentGV && "Expected a GV to commit to!");
+ Type *CurrentInitTy = CurrentGV->getInitializer()->getType();
+ // We have a valid cache that needs to be committed.
+ if (StructType *STy = dyn_cast<StructType>(CurrentInitTy))
+ CurrentGV->setInitializer(ConstantStruct::get(STy, Elts));
+ else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy))
+ CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts));
+ else
+ CurrentGV->setInitializer(ConstantVector::get(Elts));
+ }
+ if (CurrentGV == GV)
+ return;
+ // Need to clear and set up cache for new initializer.
+ CurrentGV = GV;
+ Elts.clear();
+ unsigned NumElts;
+ if (auto *STy = dyn_cast<StructType>(Ty))
+ NumElts = STy->getNumElements();
+ else
+ NumElts = cast<SequentialType>(Ty)->getNumElements();
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(Init->getAggregateElement(i));
+ }
+ };
+
+ for (auto CEPair : SimpleCEs) {
+ ConstantExpr *GEP = CEPair.first;
+ Constant *Val = CEPair.second;
+
+ GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0));
+ commitAndSetupCache(GV, GV != CurrentGV);
+ ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2));
+ Elts[CI->getZExtValue()] = Val;
+ }
+ // The last initializer in the list needs to be committed, others
+ // will be committed on a new initializer being processed.
+ commitAndSetupCache(CurrentGV, true);
+}
+
/// Evaluate static constructors in the function, if we can. Return true if we
/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
@@ -2268,11 +2560,10 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
++NumCtorsEvaluated;
// We succeeded at evaluation: commit the result.
- DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
- << F->getName() << "' to " << Eval.getMutatedMemory().size()
- << " stores.\n");
- for (const auto &I : Eval.getMutatedMemory())
- CommitValueTo(I.second, I.first);
+ LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+ << F->getName() << "' to "
+ << Eval.getMutatedMemory().size() << " stores.\n");
+ BatchCommitValueTo(Eval.getMutatedMemory());
for (GlobalVariable *GV : Eval.getInvariants())
GV->setConstant(true);
}
@@ -2287,7 +2578,7 @@ static int compareNames(Constant *const *A, Constant *const *B) {
}
static void setUsedInitializer(GlobalVariable &V,
- const SmallPtrSet<GlobalValue *, 8> &Init) {
+ const SmallPtrSetImpl<GlobalValue *> &Init) {
if (Init.empty()) {
V.eraseFromParent();
return;
@@ -2440,7 +2731,7 @@ static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
static bool
OptimizeGlobalAliases(Module &M,
- SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
LLVMUsed Used(M);
@@ -2460,7 +2751,7 @@ OptimizeGlobalAliases(Module &M,
continue;
}
- // If the aliasee may change at link time, nothing can be done - bail out.
+ // If the alias can change at link time, nothing can be done - bail out.
if (J->isInterposable())
continue;
@@ -2486,6 +2777,7 @@ OptimizeGlobalAliases(Module &M,
// Give the aliasee the name, linkage and other attributes of the alias.
Target->takeName(&*J);
Target->setLinkage(J->getLinkage());
+ Target->setDSOLocal(J->isDSOLocal());
Target->setVisibility(J->getVisibility());
Target->setDLLStorageClass(J->getDLLStorageClass());
@@ -2619,8 +2911,10 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
static bool optimizeGlobalsInModule(
Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
- SmallSet<const Comdat *, 8> NotDiscardableComdats;
+ SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -2641,8 +2935,8 @@ static bool optimizeGlobalsInModule(
NotDiscardableComdats.insert(C);
// Delete functions that are trivially dead, ccc -> fastcc
- LocalChange |=
- OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats);
+ LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree,
+ NotDiscardableComdats);
// Optimize global_ctors list.
LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
@@ -2679,7 +2973,15 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
return FAM.getResult<DominatorTreeAnalysis>(F);
};
- if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree))
+ auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -2702,12 +3004,22 @@ struct GlobalOptLegacyPass : public ModulePass {
auto LookupDomTree = [this](Function &F) -> DominatorTree & {
return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
};
- return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree);
+ auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
+ return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ };
+
+ return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
}
};
@@ -2718,6 +3030,8 @@ char GlobalOptLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)