diff options
Diffstat (limited to 'lib/Transforms/IPO/GlobalOpt.cpp')
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 422 |
1 files changed, 368 insertions, 54 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4bb2984e3b47..1af7e6894777 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -17,14 +17,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -55,6 +57,7 @@ #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -63,7 +66,6 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/Evaluator.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Transforms/Utils/Local.h" #include <cassert> #include <cstdint> #include <utility> @@ -88,6 +90,21 @@ STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); +STATISTIC(NumInternalFunc, "Number of internal functions"); +STATISTIC(NumColdCC, "Number of functions marked coldcc"); + +static cl::opt<bool> + EnableColdCCStressTest("enable-coldcc-stress-test", + cl::desc("Enable stress test of coldcc by adding " + "calling conv to all internal functions."), + cl::init(false), cl::Hidden); + +static cl::opt<int> ColdCCRelFreq( + "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, + cl::desc( + "Maximum block frequency, expressed as a percentage of caller's " + "entry frequency, for a call site to be considered cold for enabling" + "coldcc")); /// Is this global variable possibly used by a leak checker as a root? If so, /// we might not really want to eliminate the stores to it. @@ -483,7 +500,6 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { StartAlignment = DL.getABITypeAlignment(GV->getType()); if (StructType *STy = dyn_cast<StructType>(Ty)) { - uint64_t FragmentOffset = 0; unsigned NumElements = STy->getNumElements(); NewGlobals.reserve(NumElements); const StructLayout &Layout = *DL.getStructLayout(STy); @@ -509,10 +525,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. - FragmentOffset = alignTo(FragmentOffset, NewAlign); - uint64_t Size = DL.getTypeSizeInBits(NGV->getValueType()); - transferSRADebugInfo(GV, NGV, FragmentOffset, Size, NumElements); - FragmentOffset += Size; + uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); + uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i); + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements); } } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { unsigned NumElements = STy->getNumElements(); @@ -522,7 +537,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { auto ElTy = STy->getElementType(); uint64_t EltSize = DL.getTypeAllocSize(ElTy); unsigned EltAlign = DL.getABITypeAlignment(ElTy); - uint64_t FragmentSizeInBits = DL.getTypeSizeInBits(ElTy); + uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); @@ -551,7 +566,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (NewGlobals.empty()) return nullptr; - DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); + LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -621,7 +636,13 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { /// reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSetImpl<const PHINode*> &PHIs) { - for (const User *U : V->users()) + for (const User *U : V->users()) { + if (const Instruction *I = dyn_cast<Instruction>(U)) { + // If null pointer is considered valid, then all uses are non-trapping. + // Non address-space 0 globals have already been pruned by the caller. + if (NullPointerIsDefined(I->getFunction())) + return false; + } if (isa<LoadInst>(U)) { // Will trap. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { @@ -655,7 +676,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, //cerr << "NONTRAPPING USE: " << *U; return false; } - + } return true; } @@ -682,6 +703,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { bool Changed = false; for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { Instruction *I = cast<Instruction>(*UI++); + // Uses are non-trapping if null pointer is considered valid. + // Non address-space 0 globals are already pruned by the caller. + if (NullPointerIsDefined(I->getFunction())) + return false; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { LI->setOperand(0, NewV); Changed = true; @@ -783,7 +808,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, } if (Changed) { - DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n"); + LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV + << "\n"); ++NumGlobUses; } @@ -797,7 +823,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); } if (GV->use_empty()) { - DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); + LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); Changed = true; GV->eraseFromParent(); ++NumDeleted; @@ -833,7 +859,8 @@ static GlobalVariable * OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, ConstantInt *NElements, const DataLayout &DL, TargetLibraryInfo *TLI) { - DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); + LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI + << '\n'); Type *GlobalType; if (NElements->getZExtValue() == 1) @@ -1269,7 +1296,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI) { - DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); + LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI + << '\n'); Type *MAT = getMallocAllocatedType(CI, TLI); StructType *STy = cast<StructType>(MAT); @@ -1566,7 +1594,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // users of the loaded value (often calls and loads) that would trap if the // value was null. if (GV->getInitializer()->getType()->isPointerTy() && - GV->getInitializer()->isNullValue()) { + GV->getInitializer()->isNullValue() && + !NullPointerIsDefined( + nullptr /* F */, + GV->getInitializer()->getType()->getPointerAddressSpace())) { if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); @@ -1608,7 +1639,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) return false; - DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); + LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), @@ -1652,15 +1683,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // val * (ValOther - ValInit) + ValInit: // DW_OP_deref DW_OP_constu <ValMinus> // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value - E = DIExpression::get(NewGV->getContext(), - {dwarf::DW_OP_deref, - dwarf::DW_OP_constu, - ValMinus, - dwarf::DW_OP_mul, - dwarf::DW_OP_constu, - ValInit, - dwarf::DW_OP_plus, - dwarf::DW_OP_stack_value}); + SmallVector<uint64_t, 12> Ops = { + dwarf::DW_OP_deref, dwarf::DW_OP_constu, ValMinus, + dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit, + dwarf::DW_OP_plus}; + E = DIExpression::prependOpcodes(E, Ops, DIExpression::WithStackValue); DIGlobalVariableExpression *DGVE = DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); NewGV->addDebugInfo(DGVE); @@ -1732,8 +1759,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { return true; } -static bool deleteIfDead(GlobalValue &GV, - SmallSet<const Comdat *, 8> &NotDiscardableComdats) { +static bool deleteIfDead( + GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { GV.removeDeadConstantUsers(); if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) @@ -1751,7 +1778,7 @@ static bool deleteIfDead(GlobalValue &GV, if (!Dead) return false; - DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); + LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); GV.eraseFromParent(); ++NumDeleted; return true; @@ -1917,7 +1944,7 @@ static bool processInternalGlobal( LookupDomTree)) { const DataLayout &DL = GV->getParent()->getDataLayout(); - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); + LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction ->getEntryBlock().begin()); Type *ElemTy = GV->getValueType(); @@ -1938,7 +1965,7 @@ static bool processInternalGlobal( // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); + LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); bool Changed; if (isLeakCheckerRoot(GV)) { @@ -1960,7 +1987,7 @@ static bool processInternalGlobal( } if (GS.StoredType <= GlobalStatus::InitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); + LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); GV->setConstant(true); // Clean up any obviously simplifiable users now. @@ -1968,8 +1995,8 @@ static bool processInternalGlobal( // If the global is dead now, just nuke it. if (GV->use_empty()) { - DEBUG(dbgs() << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"); + LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; return true; @@ -1997,8 +2024,8 @@ static bool processInternalGlobal( CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); if (GV->use_empty()) { - DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); + LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } @@ -2097,20 +2124,142 @@ static void RemoveNestAttribute(Function *F) { /// idea here is that we don't want to mess with the convention if the user /// explicitly requested something with performance implications like coldcc, /// GHC, or anyregcc. -static bool isProfitableToMakeFastCC(Function *F) { +static bool hasChangeableCC(Function *F) { CallingConv::ID CC = F->getCallingConv(); + // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? - return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; + if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) + return false; + + // FIXME: Change CC for the whole chain of musttail calls when possible. + // + // Can't change CC of the function that either has musttail calls, or is a + // musttail callee itself + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) + continue; + CallInst* CI = dyn_cast<CallInst>(U); + if (!CI) + continue; + + if (CI->isMustTailCall()) + return false; + } + + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return false; + + return true; +} + +/// Return true if the block containing the call site has a BlockFrequency of +/// less than ColdCCRelFreq% of the entry block. +static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) { + const BranchProbability ColdProb(ColdCCRelFreq, 100); + auto CallSiteBB = CS.getInstruction()->getParent(); + auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); + auto CallerEntryFreq = + CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock())); + return CallSiteFreq < CallerEntryFreq * ColdProb; +} + +// This function checks if the input function F is cold at all call sites. It +// also looks each call site's containing function, returning false if the +// caller function contains other non cold calls. The input vector AllCallsCold +// contains a list of functions that only have call sites in cold blocks. +static bool +isValidCandidateForColdCC(Function &F, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + const std::vector<Function *> &AllCallsCold) { + + if (F.user_empty()) + return false; + + for (User *U : F.users()) { + if (isa<BlockAddress>(U)) + continue; + + CallSite CS(cast<Instruction>(U)); + Function *CallerFunc = CS.getInstruction()->getParent()->getParent(); + BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); + if (!isColdCallSite(CS, CallerBFI)) + return false; + auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc); + if (It == AllCallsCold.end()) + return false; + } + return true; +} + +static void changeCallSitesToColdCC(Function *F) { + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) + continue; + CallSite CS(cast<Instruction>(U)); + CS.setCallingConv(CallingConv::Cold); + } +} + +// This function iterates over all the call instructions in the input Function +// and checks that all call sites are in cold blocks and are allowed to use the +// coldcc calling convention. +static bool +hasOnlyColdCalls(Function &F, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (CallInst *CI = dyn_cast<CallInst>(&I)) { + CallSite CS(cast<Instruction>(CI)); + // Skip over isline asm instructions since they aren't function calls. + if (CI->isInlineAsm()) + continue; + Function *CalledFn = CI->getCalledFunction(); + if (!CalledFn) + return false; + if (!CalledFn->hasLocalLinkage()) + return false; + // Skip over instrinsics since they won't remain as function calls. + if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) + continue; + // Check if it's valid to use coldcc calling convention. + if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() || + CalledFn->hasAddressTaken()) + return false; + BlockFrequencyInfo &CallerBFI = GetBFI(F); + if (!isColdCallSite(CS, CallerBFI)) + return false; + } + } + } + return true; } static bool OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, + function_ref<TargetTransformInfo &(Function &)> GetTTI, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree, - SmallSet<const Comdat *, 8> &NotDiscardableComdats) { + SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { + bool Changed = false; + + std::vector<Function *> AllCallsCold; + for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) { + Function *F = &*FI++; + if (hasOnlyColdCalls(*F, GetBFI)) + AllCallsCold.push_back(F); + } + // Optimize functions. for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { Function *F = &*FI++; + + // Don't perform global opt pass on naked functions; we don't want fast + // calling conventions for naked functions. + if (F->hasFnAttribute(Attribute::Naked)) + continue; + // Functions without names cannot be referenced outside this module. if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); @@ -2142,7 +2291,25 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, if (!F->hasLocalLinkage()) continue; - if (isProfitableToMakeFastCC(F) && !F->isVarArg() && + + if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { + NumInternalFunc++; + TargetTransformInfo &TTI = GetTTI(*F); + // Change the calling convention to coldcc if either stress testing is + // enabled or the target would like to use coldcc on functions which are + // cold at all call sites and the callers contain no other non coldcc + // calls. + if (EnableColdCCStressTest || + (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) && + TTI.useColdCCForColdCall(*F))) { + F->setCallingConv(CallingConv::Cold); + changeCallSitesToColdCC(F); + Changed = true; + NumColdCC++; + } + } + + if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { // If this function has a calling convention worth changing, is not a // varargs function, and is only called directly, promote it to use the @@ -2168,7 +2335,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, static bool OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, function_ref<DominatorTree &(Function &)> LookupDomTree, - SmallSet<const Comdat *, 8> &NotDiscardableComdats) { + SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { bool Changed = false; for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); @@ -2254,6 +2421,131 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); } +/// Given a map of address -> value, where addresses are expected to be some form +/// of either a global or a constant GEP, set the initializer for the address to +/// be the value. This performs mostly the same function as CommitValueTo() +/// and EvaluateStoreInto() but is optimized to be more efficient for the common +/// case where the set of addresses are GEPs sharing the same underlying global, +/// processing the GEPs in batches rather than individually. +/// +/// To give an example, consider the following C++ code adapted from the clang +/// regression tests: +/// struct S { +/// int n = 10; +/// int m = 2 * n; +/// S(int a) : n(a) {} +/// }; +/// +/// template<typename T> +/// struct U { +/// T *r = &q; +/// T q = 42; +/// U *p = this; +/// }; +/// +/// U<S> e; +/// +/// The global static constructor for 'e' will need to initialize 'r' and 'p' of +/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm' +/// members. This batch algorithm will simply use general CommitValueTo() method +/// to handle the complex nested S struct initialization of 'q', before +/// processing the outermost members in a single batch. Using CommitValueTo() to +/// handle member in the outer struct is inefficient when the struct/array is +/// very large as we end up creating and destroy constant arrays for each +/// initialization. +/// For the above case, we expect the following IR to be generated: +/// +/// %struct.U = type { %struct.S*, %struct.S, %struct.U* } +/// %struct.S = type { i32, i32 } +/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e, +/// i64 0, i32 1), +/// %struct.S { i32 42, i32 84 }, %struct.U* @e } +/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex +/// constant expression, while the other two elements of @e are "simple". +static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) { + SmallVector<std::pair<GlobalVariable*, Constant*>, 32> GVs; + SmallVector<std::pair<ConstantExpr*, Constant*>, 32> ComplexCEs; + SmallVector<std::pair<ConstantExpr*, Constant*>, 32> SimpleCEs; + SimpleCEs.reserve(Mem.size()); + + for (const auto &I : Mem) { + if (auto *GV = dyn_cast<GlobalVariable>(I.first)) { + GVs.push_back(std::make_pair(GV, I.second)); + } else { + ConstantExpr *GEP = cast<ConstantExpr>(I.first); + // We don't handle the deeply recursive case using the batch method. + if (GEP->getNumOperands() > 3) + ComplexCEs.push_back(std::make_pair(GEP, I.second)); + else + SimpleCEs.push_back(std::make_pair(GEP, I.second)); + } + } + + // The algorithm below doesn't handle cases like nested structs, so use the + // slower fully general method if we have to. + for (auto ComplexCE : ComplexCEs) + CommitValueTo(ComplexCE.second, ComplexCE.first); + + for (auto GVPair : GVs) { + assert(GVPair.first->hasInitializer()); + GVPair.first->setInitializer(GVPair.second); + } + + if (SimpleCEs.empty()) + return; + + // We cache a single global's initializer elements in the case where the + // subsequent address/val pair uses the same one. This avoids throwing away and + // rebuilding the constant struct/vector/array just because one element is + // modified at a time. + SmallVector<Constant *, 32> Elts; + Elts.reserve(SimpleCEs.size()); + GlobalVariable *CurrentGV = nullptr; + + auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) { + Constant *Init = GV->getInitializer(); + Type *Ty = Init->getType(); + if (Update) { + if (CurrentGV) { + assert(CurrentGV && "Expected a GV to commit to!"); + Type *CurrentInitTy = CurrentGV->getInitializer()->getType(); + // We have a valid cache that needs to be committed. + if (StructType *STy = dyn_cast<StructType>(CurrentInitTy)) + CurrentGV->setInitializer(ConstantStruct::get(STy, Elts)); + else if (ArrayType *ArrTy = dyn_cast<ArrayType>(CurrentInitTy)) + CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts)); + else + CurrentGV->setInitializer(ConstantVector::get(Elts)); + } + if (CurrentGV == GV) + return; + // Need to clear and set up cache for new initializer. + CurrentGV = GV; + Elts.clear(); + unsigned NumElts; + if (auto *STy = dyn_cast<StructType>(Ty)) + NumElts = STy->getNumElements(); + else + NumElts = cast<SequentialType>(Ty)->getNumElements(); + for (unsigned i = 0, e = NumElts; i != e; ++i) + Elts.push_back(Init->getAggregateElement(i)); + } + }; + + for (auto CEPair : SimpleCEs) { + ConstantExpr *GEP = CEPair.first; + Constant *Val = CEPair.second; + + GlobalVariable *GV = cast<GlobalVariable>(GEP->getOperand(0)); + commitAndSetupCache(GV, GV != CurrentGV); + ConstantInt *CI = cast<ConstantInt>(GEP->getOperand(2)); + Elts[CI->getZExtValue()] = Val; + } + // The last initializer in the list needs to be committed, others + // will be committed on a new initializer being processed. + commitAndSetupCache(CurrentGV, true); +} + /// Evaluate static constructors in the function, if we can. Return true if we /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, @@ -2268,11 +2560,10 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, ++NumCtorsEvaluated; // We succeeded at evaluation: commit the result. - DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" - << F->getName() << "' to " << Eval.getMutatedMemory().size() - << " stores.\n"); - for (const auto &I : Eval.getMutatedMemory()) - CommitValueTo(I.second, I.first); + LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " + << Eval.getMutatedMemory().size() << " stores.\n"); + BatchCommitValueTo(Eval.getMutatedMemory()); for (GlobalVariable *GV : Eval.getInvariants()) GV->setConstant(true); } @@ -2287,7 +2578,7 @@ static int compareNames(Constant *const *A, Constant *const *B) { } static void setUsedInitializer(GlobalVariable &V, - const SmallPtrSet<GlobalValue *, 8> &Init) { + const SmallPtrSetImpl<GlobalValue *> &Init) { if (Init.empty()) { V.eraseFromParent(); return; @@ -2440,7 +2731,7 @@ static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, static bool OptimizeGlobalAliases(Module &M, - SmallSet<const Comdat *, 8> &NotDiscardableComdats) { + SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { bool Changed = false; LLVMUsed Used(M); @@ -2460,7 +2751,7 @@ OptimizeGlobalAliases(Module &M, continue; } - // If the aliasee may change at link time, nothing can be done - bail out. + // If the alias can change at link time, nothing can be done - bail out. if (J->isInterposable()) continue; @@ -2486,6 +2777,7 @@ OptimizeGlobalAliases(Module &M, // Give the aliasee the name, linkage and other attributes of the alias. Target->takeName(&*J); Target->setLinkage(J->getLinkage()); + Target->setDSOLocal(J->isDSOLocal()); Target->setVisibility(J->getVisibility()); Target->setDLLStorageClass(J->getDLLStorageClass()); @@ -2619,8 +2911,10 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { static bool optimizeGlobalsInModule( Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, + function_ref<TargetTransformInfo &(Function &)> GetTTI, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree) { - SmallSet<const Comdat *, 8> NotDiscardableComdats; + SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -2641,8 +2935,8 @@ static bool optimizeGlobalsInModule( NotDiscardableComdats.insert(C); // Delete functions that are trivially dead, ccc -> fastcc - LocalChange |= - OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats); + LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree, + NotDiscardableComdats); // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { @@ -2679,7 +2973,15 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ return FAM.getResult<DominatorTreeAnalysis>(F); }; - if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree)) + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult<TargetIRAnalysis>(F); + }; + + auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult<BlockFrequencyAnalysis>(F); + }; + + if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -2702,12 +3004,22 @@ struct GlobalOptLegacyPass : public ModulePass { auto LookupDomTree = [this](Function &F) -> DominatorTree & { return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); }; - return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree); + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + }; + + auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { + return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); + }; + + return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<BlockFrequencyInfoWrapperPass>(); } }; @@ -2718,6 +3030,8 @@ char GlobalOptLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt", "Global Variable Optimizer", false, false) |