diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/GlobalOpt.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/GlobalOpt.cpp | 271 |
1 files changed, 205 insertions, 66 deletions
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 0fd966457ece..d9fb820f7cb5 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -28,7 +28,6 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -41,6 +40,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -128,13 +128,16 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { Type *Ty = Types.pop_back_val(); switch (Ty->getTypeID()) { default: break; - case Type::PointerTyID: return true; + case Type::PointerTyID: + return true; + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: + if (cast<VectorType>(Ty)->getElementType()->isPointerTy()) + return true; + break; case Type::ArrayTyID: - case Type::VectorTyID: { - SequentialType *STy = cast<SequentialType>(Ty); - Types.push_back(STy->getElementType()); + Types.push_back(cast<ArrayType>(Ty)->getElementType()); break; - } case Type::StructTyID: { StructType *STy = cast<StructType>(Ty); if (STy->isOpaque()) return true; @@ -142,7 +145,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { E = STy->element_end(); I != E; ++I) { Type *InnerTy = *I; if (isa<PointerType>(InnerTy)) return true; - if (isa<CompositeType>(InnerTy)) + if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) || + isa<VectorType>(InnerTy)) Types.push_back(InnerTy); } break; @@ -191,10 +195,10 @@ CleanupPointerRootUsers(GlobalVariable *GV, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory - // usage over time. The common strategy for leak checkers is to whitelist the - // memory pointed to by globals at exit. This is popular because it also - // solves another problem where the main thread of a C++ program may shut down - // before other threads that are still expecting to use those globals. To + // usage over time. The common strategy for leak checkers is to explicitly + // allow the memory pointed to by globals at exit. This is popular because it + // also solves another problem where the main thread of a C++ program may shut + // down before other threads that are still expecting to use those globals. To // handle that case, we expect the program may create a singleton and never // destroy it. @@ -433,13 +437,27 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { return true; } +static bool IsSRASequential(Type *T) { + return isa<ArrayType>(T) || isa<VectorType>(T); +} +static uint64_t GetSRASequentialNumElements(Type *T) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return AT->getNumElements(); + return cast<FixedVectorType>(T)->getNumElements(); +} +static Type *GetSRASequentialElementType(Type *T) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return AT->getElementType(); + return cast<VectorType>(T)->getElementType(); +} static bool CanDoGlobalSRA(GlobalVariable *GV) { Constant *Init = GV->getInitializer(); if (isa<StructType>(Init->getType())) { // nothing to check - } else if (SequentialType *STy = dyn_cast<SequentialType>(Init->getType())) { - if (STy->getNumElements() > 16 && GV->hasNUsesOrMore(16)) + } else if (IsSRASequential(Init->getType())) { + if (GetSRASequentialNumElements(Init->getType()) > 16 && + GV->hasNUsesOrMore(16)) return false; // It's not worth it. } else return false; @@ -450,14 +468,19 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) { /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, - uint64_t FragmentSizeInBits, - unsigned NumElements) { + uint64_t FragmentSizeInBits) { SmallVector<DIGlobalVariableExpression *, 1> GVs; GV->getDebugInfo(GVs); for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); + Optional<uint64_t> VarSize = Var->getSizeInBits(); + DIExpression *Expr = GVE->getExpression(); - if (NumElements > 1) { + // If the FragmentSize is smaller than the variable, + // emit a fragment expression. + // If the variable size is unknown a fragment must be + // emitted to be safe. + if (!VarSize || FragmentSizeInBits < *VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -486,9 +509,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { std::map<unsigned, GlobalVariable *> NewGlobals; // Get the alignment of the global, either explicit or target-specific. - unsigned StartAlignment = GV->getAlignment(); - if (StartAlignment == 0) - StartAlignment = DL.getABITypeAlignment(GV->getType()); + Align StartAlignment = + DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType()); // Loop over all users and create replacement variables for used aggregate // elements. @@ -509,8 +531,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Type *ElTy = nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) ElTy = STy->getElementType(ElementIdx); - else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) - ElTy = STy->getElementType(); + else + ElTy = GetSRASequentialElementType(Ty); assert(ElTy); Constant *In = Init->getAggregateElement(ElementIdx); @@ -531,29 +553,27 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // had 256 byte alignment for example, something might depend on that: // propagate info to each field. uint64_t FieldOffset = Layout.getElementOffset(ElementIdx); - Align NewAlign(MinAlign(StartAlignment, FieldOffset)); - if (NewAlign > - Align(DL.getABITypeAlignment(STy->getElementType(ElementIdx)))) + Align NewAlign = commonAlignment(StartAlignment, FieldOffset); + if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx))) NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, - STy->getNumElements()); - } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size); + } else { uint64_t EltSize = DL.getTypeAllocSize(ElTy); - Align EltAlign(DL.getABITypeAlignment(ElTy)); + Align EltAlign = DL.getABITypeAlign(ElTy); uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - Align NewAlign(MinAlign(StartAlignment, EltSize * ElementIdx)); + Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx); if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, - FragmentSizeInBits, STy->getNumElements()); + FragmentSizeInBits); } } @@ -641,12 +661,12 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, return false; // Storing the value. } } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { - if (CI->getCalledValue() != V) { + if (CI->getCalledOperand() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { - if (II->getCalledValue() != V) { + if (II->getCalledOperand() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } @@ -659,9 +679,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, // checked. if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa<ICmpInst>(U) && - isa<ConstantPointerNull>(U->getOperand(1))) { - // Ignore icmp X, null } else { //cerr << "NONTRAPPING USE: " << *U; return false; @@ -706,17 +723,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { Changed = true; } } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - CallSite CS(I); - if (CS.getCalledValue() == V) { + CallBase *CB = cast<CallBase>(I); + if (CB->getCalledOperand() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - CS.setCalledFunction(NewV); + CB->setCalledOperand(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) - if (CS.getArgument(i) == V) { + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) + if (CB->getArgOperand(i) == V) { PassedAsArg = true; - CS.setArgument(i, NewV); + CB->setArgOperand(i, NewV); } if (PassedAsArg) { @@ -905,7 +922,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, - None, SI->getOrdering(), SI->getSyncScopeID(), SI); + Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -922,7 +939,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool->getValueType(), InitBool, - InitBool->getName() + ".val", false, None, + InitBool->getName() + ".val", false, Align(1), LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction *)ICI : LI); InitBoolUsed = true; @@ -1729,7 +1746,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, None, + LI->getName() + ".b", false, Align(1), LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && @@ -1739,14 +1756,14 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } StoreInst *NSI = - new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(), + new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI); NSI->setDebugLoc(SI->getDebugLoc()); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast<LoadInst>(UI); LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, None, + LI->getName() + ".b", false, Align(1), LI->getOrdering(), LI->getSyncScopeID(), LI); Instruction *NSI; if (IsOneZero) @@ -2117,8 +2134,7 @@ static void ChangeCalleesToFastCall(Function *F) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - CS.setCallingConv(CallingConv::Fast); + cast<CallBase>(U)->setCallingConv(CallingConv::Fast); } } @@ -2135,8 +2151,8 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A)); + CallBase *CB = cast<CallBase>(U); + CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A)); } } @@ -2175,12 +2191,12 @@ static bool hasChangeableCC(Function *F) { /// Return true if the block containing the call site has a BlockFrequency of /// less than ColdCCRelFreq% of the entry block. -static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) { +static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) { const BranchProbability ColdProb(ColdCCRelFreq, 100); - auto CallSiteBB = CS.getInstruction()->getParent(); + auto *CallSiteBB = CB.getParent(); auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); auto CallerEntryFreq = - CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock())); + CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock())); return CallSiteFreq < CallerEntryFreq * ColdProb; } @@ -2200,10 +2216,10 @@ isValidCandidateForColdCC(Function &F, if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - Function *CallerFunc = CS.getInstruction()->getParent()->getParent(); + CallBase &CB = cast<CallBase>(*U); + Function *CallerFunc = CB.getParent()->getParent(); BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); - if (!isColdCallSite(CS, CallerBFI)) + if (!isColdCallSite(CB, CallerBFI)) return false; auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc); if (It == AllCallsCold.end()) @@ -2216,8 +2232,7 @@ static void changeCallSitesToColdCC(Function *F) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - CS.setCallingConv(CallingConv::Cold); + cast<CallBase>(U)->setCallingConv(CallingConv::Cold); } } @@ -2230,7 +2245,6 @@ hasOnlyColdCalls(Function &F, for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (CallInst *CI = dyn_cast<CallInst>(&I)) { - CallSite CS(cast<Instruction>(CI)); // Skip over isline asm instructions since they aren't function calls. if (CI->isInlineAsm()) continue; @@ -2247,7 +2261,7 @@ hasOnlyColdCalls(Function &F, CalledFn->hasAddressTaken()) return false; BlockFrequencyInfo &CallerBFI = GetBFI(F); - if (!isColdCallSite(CS, CallerBFI)) + if (!isColdCallSite(*CI, CallerBFI)) return false; } } @@ -2255,6 +2269,115 @@ hasOnlyColdCalls(Function &F, return true; } +static bool hasMustTailCallers(Function *F) { + for (User *U : F->users()) { + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB) { + assert(isa<BlockAddress>(U) && + "Expected either CallBase or BlockAddress"); + continue; + } + if (CB->isMustTailCall()) + return true; + } + return false; +} + +static bool hasInvokeCallers(Function *F) { + for (User *U : F->users()) + if (isa<InvokeInst>(U)) + return true; + return false; +} + +static void RemovePreallocated(Function *F) { + RemoveAttribute(F, Attribute::Preallocated); + + auto *M = F->getParent(); + + IRBuilder<> Builder(M->getContext()); + + // Cannot modify users() while iterating over it, so make a copy. + SmallVector<User *, 4> PreallocatedCalls(F->users()); + for (User *U : PreallocatedCalls) { + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB) + continue; + + assert( + !CB->isMustTailCall() && + "Shouldn't call RemotePreallocated() on a musttail preallocated call"); + // Create copy of call without "preallocated" operand bundle. + SmallVector<OperandBundleDef, 1> OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + CallBase *PreallocatedSetup = nullptr; + for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { + if (It->getTag() == "preallocated") { + PreallocatedSetup = cast<CallBase>(*It->input_begin()); + OpBundles.erase(It); + break; + } + } + assert(PreallocatedSetup && "Did not find preallocated bundle"); + uint64_t ArgCount = + cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue(); + + assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) && + "Unknown indirect call type"); + CallBase *NewCB = CallBase::Create(CB, OpBundles, CB); + CB->replaceAllUsesWith(NewCB); + NewCB->takeName(CB); + CB->eraseFromParent(); + + Builder.SetInsertPoint(PreallocatedSetup); + auto *StackSave = + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction()); + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + + // Replace @llvm.call.preallocated.arg() with alloca. + // Cannot modify users() while iterating over it, so make a copy. + // @llvm.call.preallocated.arg() can be called with the same index multiple + // times. So for each @llvm.call.preallocated.arg(), we see if we have + // already created a Value* for the index, and if not, create an alloca and + // bitcast right after the @llvm.call.preallocated.setup() so that it + // dominates all uses. + SmallVector<Value *, 2> ArgAllocas(ArgCount); + SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users()); + for (auto *User : PreallocatedArgs) { + auto *UseCall = cast<CallBase>(User); + assert(UseCall->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_arg && + "preallocated token use was not a llvm.call.preallocated.arg"); + uint64_t AllocArgIndex = + cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue(); + Value *AllocaReplacement = ArgAllocas[AllocArgIndex]; + if (!AllocaReplacement) { + auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); + auto *ArgType = UseCall + ->getAttribute(AttributeList::FunctionIndex, + Attribute::Preallocated) + .getValueAsType(); + auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction(); + Builder.SetInsertPoint(InsertBefore); + auto *Alloca = + Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg"); + auto *BitCast = Builder.CreateBitCast( + Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName()); + ArgAllocas[AllocArgIndex] = BitCast; + AllocaReplacement = BitCast; + } + + UseCall->replaceAllUsesWith(AllocaReplacement); + UseCall->eraseFromParent(); + } + // Remove @llvm.call.preallocated.setup(). + cast<Instruction>(PreallocatedSetup)->eraseFromParent(); + } +} + static bool OptimizeFunctions(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI, @@ -2319,11 +2442,22 @@ OptimizeFunctions(Module &M, // FIXME: We should also hoist alloca affected by this to the entry // block if possible. if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && - !F->hasAddressTaken()) { + !F->hasAddressTaken() && !hasMustTailCallers(F)) { RemoveAttribute(F, Attribute::InAlloca); Changed = true; } + // FIXME: handle invokes + // FIXME: handle musttail + if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { + if (!F->hasAddressTaken() && !hasMustTailCallers(F) && + !hasInvokeCallers(F)) { + RemovePreallocated(F); + Changed = true; + } + continue; + } + if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { NumInternalFunc++; TargetTransformInfo &TTI = GetTTI(*F); @@ -2385,7 +2519,7 @@ OptimizeGlobalVars(Module &M, // for that optional parameter, since we don't have a Function to // provide GetTLI anyway. Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); - if (New && New != C) + if (New != C) GV->setInitializer(New); } @@ -2427,8 +2561,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, } ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - SequentialType *InitTy = cast<SequentialType>(Init->getType()); - uint64_t NumElts = InitTy->getNumElements(); + uint64_t NumElts; + if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) + NumElts = ATy->getNumElements(); + else + NumElts = cast<FixedVectorType>(Init->getType())->getNumElements(); // Break up the array into elements. for (uint64_t i = 0, e = NumElts; i != e; ++i) @@ -2439,7 +2576,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts); return ConstantVector::get(Elts); } @@ -2561,8 +2698,10 @@ static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) { unsigned NumElts; if (auto *STy = dyn_cast<StructType>(Ty)) NumElts = STy->getNumElements(); + else if (auto *ATy = dyn_cast<ArrayType>(Ty)) + NumElts = ATy->getNumElements(); else - NumElts = cast<SequentialType>(Ty)->getNumElements(); + NumElts = cast<FixedVectorType>(Ty)->getNumElements(); for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(Init->getAggregateElement(i)); } |