diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-03 14:10:23 +0000 |
| commit | 145449b1e420787bb99721a429341fa6be3adfb6 (patch) | |
| tree | 1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Transforms/IPO | |
| parent | ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff) | |
Diffstat (limited to 'llvm/lib/Transforms/IPO')
39 files changed, 3672 insertions, 3192 deletions
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index a6d9ce1033f3..58cea7ebb749 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -1,4 +1,4 @@ -//===- InlineAlways.cpp - Code to inline always_inline functions ----------===// +//===- AlwaysInliner.cpp - Code to inline always_inline functions ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,15 +16,10 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -60,31 +55,38 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, for (User *U : F.users()) if (auto *CB = dyn_cast<CallBase>(U)) if (CB->getCalledFunction() == &F && - CB->hasFnAttr(Attribute::AlwaysInline)) - Calls.insert(CB); + CB->hasFnAttr(Attribute::AlwaysInline) && + !CB->getAttributes().hasFnAttr(Attribute::NoInline)) + Calls.insert(CB); for (CallBase *CB : Calls) { Function *Caller = CB->getCaller(); OptimizationRemarkEmitter ORE(Caller); - auto OIC = shouldInline( - *CB, - [&](CallBase &CB) { - return InlineCost::getAlways("always inline attribute"); - }, - ORE); - assert(OIC); - emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F, - *Caller, *OIC, false, DEBUG_TYPE); + DebugLoc DLoc = CB->getDebugLoc(); + BasicBlock *Block = CB->getParent(); InlineFunctionInfo IFI( /*cg=*/nullptr, GetAssumptionCache, &PSI, - &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())), + &FAM.getResult<BlockFrequencyAnalysis>(*Caller), &FAM.getResult<BlockFrequencyAnalysis>(F)); InlineResult Res = InlineFunction( *CB, IFI, &FAM.getResult<AAManager>(F), InsertLifetime); - assert(Res.isSuccess() && "unexpected failure to inline"); - (void)Res; + if (!Res.isSuccess()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, + Block) + << "'" << ore::NV("Callee", &F) << "' is not inlined into '" + << ore::NV("Caller", Caller) + << "': " << ore::NV("Reason", Res.getFailureReason()); + }); + continue; + } + + emitInlinedIntoBasedOnCost( + ORE, DLoc, Block, F, *Caller, + InlineCost::getAlways("always inline attribute"), + /*ForProfileContext=*/false, DEBUG_TYPE); // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(*Caller, F); @@ -210,6 +212,9 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) { if (!CB.hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getNever("no alwaysinline attribute"); + if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline()) + return InlineCost::getNever("noinline call site attribute"); + auto IsViable = isInlineViable(*Callee); if (!IsViable.isSuccess()) return InlineCost::getNever(IsViable.getFailureReason()); diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index e6a542385662..62cfc3294968 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,9 +29,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ArgumentPromotion.h" + #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" @@ -40,15 +39,11 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -56,33 +51,26 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <cassert> #include <cstdint> -#include <functional> -#include <iterator> -#include <map> -#include <set> #include <utility> #include <vector> @@ -91,43 +79,81 @@ using namespace llvm; #define DEBUG_TYPE "argpromotion" STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); -STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); -STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); -/// A vector used to hold the indices of a single GEP instruction -using IndicesVector = std::vector<uint64_t>; +namespace { + +struct ArgPart { + Type *Ty; + Align Alignment; + /// A representative guaranteed-executed load or store instruction for use by + /// metadata transfer. + Instruction *MustExecInstr; +}; + +using OffsetAndArgPart = std::pair<int64_t, ArgPart>; + +} // end anonymous namespace + +static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, + Value *Ptr, Type *ResElemTy, int64_t Offset) { + // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise + // fall back to an i8 GEP to a specific offset. + unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); + APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); + if (!Ptr->getType()->isOpaquePointerTy()) { + Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType(); + if (OrigOffset == 0 && OrigElemTy == ResElemTy) + return Ptr; + + if (OrigElemTy->isSized()) { + APInt TmpOffset = OrigOffset; + Type *TmpTy = OrigElemTy; + SmallVector<APInt> IntIndices = + DL.getGEPIndicesForOffset(TmpTy, TmpOffset); + if (TmpOffset == 0) { + // Try to add trailing zero indices to reach the right type. + while (TmpTy != ResElemTy) { + Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0); + if (!NextTy) + break; + + IntIndices.push_back(APInt::getZero( + isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth())); + TmpTy = NextTy; + } + + SmallVector<Value *> Indices; + for (const APInt &Index : IntIndices) + Indices.push_back(IRB.getInt(Index)); + + if (OrigOffset != 0 || TmpTy == ResElemTy) { + Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices); + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); + } + } + } + } + + if (OrigOffset != 0) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace)); + Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset)); + } + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); +} /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * -doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, - SmallPtrSetImpl<Argument *> &ByValArgsToTransform, - Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> - ReplaceCallSite) { +doPromotion(Function *F, FunctionAnalysisManager &FAM, + const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> + &ArgsToPromote) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; - using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; - - // ScalarizedElements - If we are promoting a pointer that has elements - // accessed out of it, keep track of which elements are accessed so that we - // can add one argument for each. - // - // Arguments that are directly loaded will have a zero element value here, to - // handle cases where there are both a direct load and GEP accesses. - std::map<Argument *, ScalarizeTable> ScalarizedElements; - - // OriginalLoads - Keep track of a representative load instruction from the - // original function so that we can tell the alias analysis implementation - // what the new GEP/Load instructions we are inserting look like. - // We need to keep the original loads for each argument and the elements - // of the argument that are accessed. - std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; - // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost @@ -138,15 +164,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { - if (ByValArgsToTransform.count(&*I)) { - // Simple byval argument? Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast<StructType>(AgTy); - llvm::append_range(Params, STy->elements()); - ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), - AttributeSet()); - ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(&*I)) { + if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); @@ -154,58 +172,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { - // Okay, this is being promoted. This means that the only uses are loads - // or GEPs which are only used by loads - - // In this table, we will track which indices are loaded from the argument - // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - for (User *U : make_early_inc_range(I->users())) { - Instruction *UI = cast<Instruction>(U); - Type *SrcTy; - if (LoadInst *L = dyn_cast<LoadInst>(UI)) - SrcTy = L->getType(); - else - SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); - // Skip dead GEPs and remove them. - if (isa<GetElementPtrInst>(UI) && UI->use_empty()) { - UI->eraseFromParent(); - continue; - } - - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (const Use &I : llvm::drop_begin(UI->operands())) - Indices.push_back(cast<ConstantInt>(I)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); - ArgIndices.insert(std::make_pair(SrcTy, Indices)); - LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast<LoadInst>(UI)) - OrigLoad = L; - else - // Take any load, we will use it only to update Alias Analysis - OrigLoad = cast<LoadInst>(UI->user_back()); - OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; - } - - // Add a parameter to the function for each element passed in. - for (const auto &ArgIndex : ArgIndices) { - // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType( - I->getType()->getPointerElementType(), ArgIndex.second)); + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + Params.push_back(Pair.second.Ty); ArgAttrVec.push_back(AttributeSet()); - assert(Params.back()); } - - if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) - ++NumArgumentsPromoted; - else - ++NumAggregatesPromoted; + ++NumArgumentsPromoted; } } @@ -222,24 +194,30 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // The new function will have the !dbg metadata copied from the original // function. The original function may not be deleted, and dbg metadata need - // to be unique so we need to drop it. + // to be unique, so we need to drop it. F->setSubprogram(nullptr); LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); + uint64_t LargestVectorWidth = 0; + for (auto *I : Params) + if (auto *VT = dyn_cast<llvm::VectorType>(I)) + LargestVectorWidth = std::max( + LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); + // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrVec)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Loop over all of the callers of the function, transforming the call sites - // to pass in the loaded pointers. - // + // Loop over all the callers of the function, transforming the call sites to + // pass in the loaded pointers. SmallVector<Value *, 16> Args; const DataLayout &DL = F->getParent()->getDataLayout(); while (!F->use_empty()) { @@ -250,74 +228,34 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Loop over the operands, inserting GEP and loads in the caller as // appropriate. - auto AI = CB.arg_begin(); + auto *AI = CB.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgNo) - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + ++I, ++AI, ++ArgNo) { + if (!ArgsToPromote.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); - } else if (ByValArgsToTransform.count(&*I)) { - // Emit a GEP and load for each element of the struct. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast<StructType>(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - Align StructAlign = *I->getParamAlign(); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - auto *Idx = - IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i)); - // TODO: Tell AA about the new values? - Align Alignment = - commonAlignment(StructAlign, SL->getElementOffset(i)); - Args.push_back(IRB.CreateAlignedLoad( - STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val")); - ArgAttrVec.push_back(AttributeSet()); - } } else if (!I->use_empty()) { - // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - // Store the Value* version of the indices in here, but declare it now - // for reuse. - std::vector<Value *> Ops; - for (const auto &ArgIndex : ArgIndices) { - Value *V = *AI; - LoadInst *OrigLoad = - OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; - if (!ArgIndex.second.empty()) { - Ops.reserve(ArgIndex.second.size()); - Type *ElTy = V->getType(); - for (auto II : ArgIndex.second) { - // Use i32 to index structs, and i64 for others (pointers/arrays). - // This satisfies GEP constraints. - Type *IdxTy = - (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) - : Type::getInt64Ty(F->getContext())); - Ops.push_back(ConstantInt::get(IdxTy, II)); - // Keep track of the type we're currently indexing. - if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) - ElTy = ElPTy->getPointerElementType(); - else - ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II); - } - // And create a GEP to extract those indices. - V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); - Ops.clear(); + Value *V = *AI; + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + LoadInst *LI = IRB.CreateAlignedLoad( + Pair.second.Ty, + createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), + Pair.second.Alignment, V->getName() + ".val"); + if (Pair.second.MustExecInstr) { + LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); + LI->copyMetadata(*Pair.second.MustExecInstr, + {LLVMContext::MD_range, LLVMContext::MD_nonnull, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_align, LLVMContext::MD_noundef}); } - // Since we're replacing a load make sure we take the alignment - // of the previous load. - LoadInst *newLoad = - IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlign()); - // Transfer the AA info too. - newLoad->setAAMetadata(OrigLoad->getAAMetadata()); - - Args.push_back(newLoad); + Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } } + } // Push any varargs arguments on the list. for (; AI != CB.arg_end(); ++AI, ++ArgNo) { @@ -345,9 +283,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, Args.clear(); ArgAttrVec.clear(); - // Update the callgraph to know that the callsite has been transformed. - if (ReplaceCallSite) - (*ReplaceCallSite)(CB, *NewCS); + AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(), + LargestVectorWidth); if (!CB.use_empty()) { CB.replaceAllUsesWith(NewCS); @@ -364,11 +301,15 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + // We will collect all the new created allocas to promote them into registers + // after the following loop + SmallVector<AllocaInst *, 4> Allocas; + // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. Function::arg_iterator I2 = NF->arg_begin(); for (Argument &Arg : F->args()) { - if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { + if (!ArgsToPromote.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. Arg.replaceAllUsesWith(&*I2); @@ -377,37 +318,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, continue; } - if (ByValArgsToTransform.count(&Arg)) { - // In the callee, we create an alloca, and store each of the new incoming - // arguments into the alloca. - Instruction *InsertPt = &NF->begin()->front(); - - // Just add all the struct element types. - Type *AgTy = Arg.getParamByValType(); - Align StructAlign = *Arg.getParamAlign(); - Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - StructAlign, "", InsertPt); - StructType *STy = cast<StructType>(AgTy); - Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), - nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create( - AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), - InsertPt); - I2->setName(Arg.getName() + "." + Twine(i)); - Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i)); - new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); - } - - // Anything that used the arg should now use the alloca. - Arg.replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&Arg); - continue; - } - // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( @@ -416,57 +326,95 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, if (Arg.use_empty()) continue; - // Otherwise, if we promoted this argument, then all users are load - // instructions (or GEPs with only load users), and all loads should be - // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&Arg]; + // Otherwise, if we promoted this argument, we have to create an alloca in + // the callee for every promotable part and store each of the new incoming + // arguments into the corresponding alloca, what lets the old code (the + // store instructions if they are allowed especially) a chance to work as + // before. + assert(Arg.getType()->isPointerTy() && + "Only arguments with a pointer type are promotable"); - while (!Arg.use_empty()) { - if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) { - assert(ArgIndices.begin()->second.empty() && - "Load element should sort to front!"); - I2->setName(Arg.getName() + ".val"); - LI->replaceAllUsesWith(&*I2); - LI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName() - << "' in function '" << F->getName() << "'\n"); - } else { - GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back()); - assert(!GEP->use_empty() && - "GEPs without uses should be cleaned up already"); - IndicesVector Operands; - Operands.reserve(GEP->getNumIndices()); - for (const Use &Idx : GEP->indices()) - Operands.push_back(cast<ConstantInt>(Idx)->getSExtValue()); + IRBuilder<NoFolder> IRB(&NF->begin()->front()); - // GEPs with a single 0 index can be merged with direct loads - if (Operands.size() == 1 && Operands.front() == 0) - Operands.clear(); + // Add only the promoted elements, so parts from ArgsToPromote + SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; + for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { + int64_t Offset = Pair.first; + const ArgPart &Part = Pair.second; - Function::arg_iterator TheArg = I2; - for (ScalarizeTable::iterator It = ArgIndices.begin(); - It->second != Operands; ++It, ++TheArg) { - assert(It != ArgIndices.end() && "GEP not handled??"); - } + Argument *NewArg = I2++; + NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); + + AllocaInst *NewAlloca = IRB.CreateAlloca( + Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); + NewAlloca->setAlignment(Pair.second.Alignment); + IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); - TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(), - make_range(Operands.begin(), Operands.end()))); + // Collect the alloca to retarget the users to + OffsetToAlloca.insert({Offset, NewAlloca}); + } - LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"); + auto GetAlloca = [&](Value *Ptr) { + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + assert(Ptr == &Arg && "Not constant offset from arg?"); + return OffsetToAlloca.lookup(Offset.getSExtValue()); + }; - // All of the uses must be load instructions. Replace them all with - // the argument specified by ArgNo. - while (!GEP->use_empty()) { - LoadInst *L = cast<LoadInst>(GEP->user_back()); - L->replaceAllUsesWith(&*TheArg); - L->eraseFromParent(); - } - GEP->eraseFromParent(); + // Cleanup the code from the dead instructions: GEPs and BitCasts in between + // the original argument and its users: loads and stores. Retarget every + // user to the new created alloca. + SmallVector<Value *, 16> Worklist; + SmallVector<Instruction *, 16> DeadInsts; + append_range(Worklist, Arg.users()); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) { + DeadInsts.push_back(cast<Instruction>(V)); + append_range(Worklist, V->users()); + continue; + } + + if (auto *LI = dyn_cast<LoadInst>(V)) { + Value *Ptr = LI->getPointerOperand(); + LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; + } + + if (auto *SI = dyn_cast<StoreInst>(V)) { + assert(!SI->isVolatile() && "Volatile operations can't be promoted."); + Value *Ptr = SI->getPointerOperand(); + SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; } + + llvm_unreachable("Unexpected user"); + } + + for (Instruction *I : DeadInsts) { + I->replaceAllUsesWith(PoisonValue::get(I->getType())); + I->eraseFromParent(); + } + + // Collect the allocas for promotion + for (const auto &Pair : OffsetToAlloca) { + assert(isAllocaPromotable(Pair.second) && + "By design, only promotable allocas should be produced."); + Allocas.push_back(Pair.second); } - // Increment I2 past all of the arguments added for this promoted pointer. - std::advance(I2, ArgIndices.size()); + } + + LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() + << " alloca(s) are promotable by Mem2Reg\n"); + + if (!Allocas.empty()) { + // And we are able to call the `promoteMemoryToRegister()` function. + // Our earlier checks have ensured that PromoteMemToReg() will + // succeed. + auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF); + auto &AC = FAM.getResult<AssumptionAnalysis>(*NF); + PromoteMemToReg(Allocas, DT, &AC); } return NF; @@ -474,100 +422,37 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, /// Return true if we can prove that all callees pass in a valid pointer for the /// specified function argument. -static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) { +static bool allCallersPassValidPointerForArgument(Argument *Arg, + Align NeededAlign, + uint64_t NeededDerefBytes) { Function *Callee = Arg->getParent(); const DataLayout &DL = Callee->getParent()->getDataLayout(); + APInt Bytes(64, NeededDerefBytes); - unsigned ArgNo = Arg->getArgNo(); + // Check if the argument itself is marked dereferenceable and aligned. + if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) + return true; // Look at all call sites of the function. At this point we know we only have // direct callees. - for (User *U : Callee->users()) { + return all_of(Callee->users(), [&](User *U) { CallBase &CB = cast<CallBase>(*U); - - if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL)) - return false; - } - return true; -} - -/// Returns true if Prefix is a prefix of longer. That means, Longer has a size -/// that is greater than or equal to the size of prefix, and each of the -/// elements in Prefix is the same as the corresponding elements in Longer. -/// -/// This means it also returns true when Prefix and Longer are equal! -static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { - if (Prefix.size() > Longer.size()) - return false; - return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); -} - -/// Checks if Indices, or a prefix of Indices, is in Set. -static bool prefixIn(const IndicesVector &Indices, - std::set<IndicesVector> &Set) { - std::set<IndicesVector>::iterator Low; - Low = Set.upper_bound(Indices); - if (Low != Set.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This means - // it points to a prefix of Indices (possibly Indices itself), if such - // prefix exists. - // - // This load is safe if any prefix of its operands is safe to load. - return Low != Set.end() && isPrefix(*Low, Indices); + return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), + NeededAlign, Bytes, DL); + }); } -/// Mark the given indices (ToMark) as safe in the given set of indices -/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there -/// is already a prefix of Indices in Safe, Indices are implicitely marked safe -/// already. Furthermore, any indices that Indices is itself a prefix of, are -/// removed from Safe (since they are implicitely safe because of Indices now). -static void markIndicesSafe(const IndicesVector &ToMark, - std::set<IndicesVector> &Safe) { - std::set<IndicesVector>::iterator Low; - Low = Safe.upper_bound(ToMark); - // Guard against the case where Safe is empty - if (Low != Safe.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This - // means it points to a prefix of Indices (possibly Indices itself), if - // such prefix exists. - if (Low != Safe.end()) { - if (isPrefix(*Low, ToMark)) - // If there is already a prefix of these indices (or exactly these - // indices) marked a safe, don't bother adding these indices - return; - - // Increment Low, so we can use it as a "insert before" hint - ++Low; - } - // Insert - Low = Safe.insert(Low, ToMark); - ++Low; - // If there we're a prefix of longer index list(s), remove those - std::set<IndicesVector>::iterator End = Safe.end(); - while (Low != End && isPrefix(ToMark, *Low)) { - std::set<IndicesVector>::iterator Remove = Low; - ++Low; - Safe.erase(Remove); - } -} - -/// isSafeToPromoteArgument - As you might guess from the name of this method, -/// it checks to see if it is both safe and useful to promote the argument. -/// This method limits promotion of aggregates to only promote up to three -/// elements of the aggregate in order to avoid exploding the number of -/// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, - unsigned MaxElements) { - using GEPIndicesSet = std::set<IndicesVector>; - +/// Determine that this argument is safe to promote, and find the argument +/// parts it can be promoted into. +static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, + unsigned MaxElements, bool IsRecursive, + SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { // Quick exit for unused arguments if (Arg->use_empty()) return true; - // We can only promote this argument if all of the uses are loads, or are GEP - // instructions (with constant indices) that are subsequently loaded. + // We can only promote this argument if all the uses are loads at known + // offsets. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load @@ -578,157 +463,193 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. - // - // This set will contain all sets of indices that are loaded in the entry - // block, and thus are safe to unconditionally load in the caller. - GEPIndicesSet SafeToUnconditionallyLoad; - // This set contains all the sets of indices that we are planning to promote. - // This makes it possible to limit the number of arguments added. - GEPIndicesSet ToPromote; + SmallDenseMap<int64_t, ArgPart, 4> ArgParts; + Align NeededAlign(1); + uint64_t NeededDerefBytes = 0; - // If the pointer is always valid, any load with first index 0 is valid. + // And if this is a byval argument we also allow to have store instructions. + // Only handle in such way arguments with specified alignment; + // if it's unspecified, the actual alignment of the argument is + // target-specific. + bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); - if (ByValTy) - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + // An end user of a pointer argument is a load or store instruction. + // Returns None if this load or store is not based on the argument. Return + // true if we can promote the instruction, false otherwise. + auto HandleEndUser = [&](auto *I, Type *Ty, + bool GuaranteedToExecute) -> Optional<bool> { + // Don't promote volatile or atomic instructions. + if (!I->isSimple()) + return false; - // Whenever a new underlying type for the operand is found, make sure it's - // consistent with the GEPs and loads we've already seen and, if necessary, - // use it to see if all incoming pointers are valid (which implies the 0-index - // is safe). - Type *BaseTy = ByValTy; - auto UpdateBaseTy = [&](Type *NewBaseTy) { - if (BaseTy) - return BaseTy == NewBaseTy; + Value *Ptr = I->getPointerOperand(); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + if (Ptr != Arg) + return None; - BaseTy = NewBaseTy; - if (allCallersPassValidPointerForArgument(Arg, BaseTy)) { - assert(SafeToUnconditionallyLoad.empty()); - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); - } + if (Offset.getSignificantBits() >= 64) + return false; - return true; - }; + TypeSize Size = DL.getTypeStoreSize(Ty); + // Don't try to promote scalable types. + if (Size.isScalable()) + return false; - // First, iterate functions that are guaranteed to execution on function - // entry and mark loads of (geps of) arguments as safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); - // Declare this here so we can reuse it - IndicesVector Indices; - for (Instruction &I : EntryBlock) { - if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { - V = GEP->getPointerOperand(); - if (V == Arg) { - // This load actually loads (part of) Arg? Check the indices then. - Indices.reserve(GEP->getNumIndices()); - for (Use &Idx : GEP->indices()) - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) - Indices.push_back(CI->getSExtValue()); - else - // We found a non-constant GEP index for this argument? Bail out - // right away, can't promote this argument at all. - return false; + // If this is a recursive function and one of the types is a pointer, + // then promoting it might lead to recursive promotion. + if (IsRecursive && Ty->isPointerTy()) + return false; - if (!UpdateBaseTy(GEP->getSourceElementType())) - return false; + int64_t Off = Offset.getSExtValue(); + auto Pair = ArgParts.try_emplace( + Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); + ArgPart &Part = Pair.first->second; + bool OffsetNotSeenBefore = Pair.second; - // Indices checked out, mark them as safe - markIndicesSafe(Indices, SafeToUnconditionallyLoad); - Indices.clear(); - } - } else if (V == Arg) { - // Direct loads are equivalent to a GEP with a single 0 index. - markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + // We limit promotion to only promoting up to a fixed number of elements of + // the aggregate. + if (MaxElements > 0 && ArgParts.size() > MaxElements) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "more than " << MaxElements << " parts\n"); + return false; + } - if (BaseTy && LI->getType() != BaseTy) - return false; + // For now, we only support loading/storing one specific type at a given + // offset. + if (Part.Ty != Ty) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "accessed as both " << *Part.Ty << " and " << *Ty + << " at offset " << Off << "\n"); + return false; + } - BaseTy = LI->getType(); - } + // If this instruction is not guaranteed to execute, and we haven't seen a + // load or store at this offset before (or it had lower alignment), then we + // need to remember that requirement. + // Note that skipping instructions of previously seen offsets is only + // correct because we only allow a single type for a given offset, which + // also means that the number of accessed bytes will be the same. + if (!GuaranteedToExecute && + (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { + // We won't be able to prove dereferenceability for negative offsets. + if (Off < 0) + return false; + + // If the offset is not aligned, an aligned base pointer won't help. + if (!isAligned(I->getAlign(), Off)) + return false; + + NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); + NeededAlign = std::max(NeededAlign, I->getAlign()); } + Part.Alignment = std::max(Part.Alignment, I->getAlign()); + return true; + }; + + // Look for loads and stores that are guaranteed to execute on entry. + for (Instruction &I : Arg->getParent()->getEntryBlock()) { + Optional<bool> Res{}; + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) + Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); + else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + Res = HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ true); + if (Res && !*Res) + return false; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; } - // Now, iterate all uses of the argument to see if there are any uses that are - // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + // Now look at all loads of the argument. Remember the load instructions + // for the aliasing check below. + SmallVector<const Use *, 16> Worklist; + SmallPtrSet<const Use *, 16> Visited; SmallVector<LoadInst *, 16> Loads; - IndicesVector Operands; - for (Use &U : Arg->uses()) { - User *UR = U.getUser(); - Operands.clear(); - if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - // Direct loads are equivalent to a GEP with a zero index and then a load. - Operands.push_back(0); + auto AppendUses = [&](const Value *V) { + for (const Use &U : V->uses()) + if (Visited.insert(&U).second) + Worklist.push_back(&U); + }; + AppendUses(Arg); + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + Value *V = U->getUser(); + if (isa<BitCastInst>(V)) { + AppendUses(V); + continue; + } - if (!UpdateBaseTy(LI->getType())) + if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) { + if (!GEP->hasAllConstantIndices()) return false; - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { - if (GEP->use_empty()) { - // Dead GEP's cause trouble later. Just remove them if we run into - // them. - continue; - } + AppendUses(V); + continue; + } - if (!UpdateBaseTy(GEP->getSourceElementType())) + if (auto *LI = dyn_cast<LoadInst>(V)) { + if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) return false; + Loads.push_back(LI); + continue; + } - // Ensure that all of the indices are constants. - for (Use &Idx : GEP->indices()) - if (ConstantInt *C = dyn_cast<ConstantInt>(Idx)) - Operands.push_back(C->getSExtValue()); - else - return false; // Not a constant operand GEP! - - // Ensure that the only users of the GEP are load instructions. - for (User *GEPU : GEP->users()) - if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - } else { - // Other uses than load? - return false; - } - } else { - return false; // Not a load or a GEP. + // Stores are allowed for byval arguments + auto *SI = dyn_cast<StoreInst>(V); + if (AreStoresAllowed && SI && + U->getOperandNo() == StoreInst::getPointerOperandIndex()) { + if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ false)) + return false; + continue; + // Only stores TO the argument is allowed, all the other stores are + // unknown users } - // Now, see if it is safe to promote this load / loads of this GEP. Loading - // is safe if Operands, or a prefix of Operands, is marked as safe. - if (!prefixIn(Operands, SafeToUnconditionallyLoad)) - return false; + // Unknown user. + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "unknown user " << *V << "\n"); + return false; + } - // See if we are already promoting a load with these indices. If not, check - // to make sure that we aren't promoting too many elements. If so, nothing - // to do. - if (ToPromote.find(Operands) == ToPromote.end()) { - if (MaxElements > 0 && ToPromote.size() == MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '" - << Arg->getName() - << "' because it would require adding more " - << "than " << MaxElements - << " arguments to the function.\n"); - // We limit aggregate promotion to only promoting up to a fixed number - // of elements of the aggregate. - return false; - } - ToPromote.insert(std::move(Operands)); + if (NeededDerefBytes || NeededAlign > 1) { + // Try to prove a required deref / aligned requirement. + if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, + NeededDerefBytes)) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "not dereferenceable or aligned\n"); + return false; } } - if (Loads.empty()) + if (ArgParts.empty()) return true; // No users, this is a dead argument. - // Okay, now we know that the argument is only used by load instructions and + // Sort parts by offset. + append_range(ArgPartsVec, ArgParts); + sort(ArgPartsVec, + [](const auto &A, const auto &B) { return A.first < B.first; }); + + // Make sure the parts are non-overlapping. + int64_t Offset = ArgPartsVec[0].first; + for (const auto &Pair : ArgPartsVec) { + if (Pair.first < Offset) + return false; // Overlap with previous part. + + Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); + } + + // If store instructions are allowed, the path from the entry of the function + // to each load may be not free of instructions that potentially invalidate + // the load, and this is an admissible situation. + if (AreStoresAllowed) + return true; + + // Okay, now we know that the argument is only used by load instructions, and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. @@ -762,118 +683,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR return true; } -bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { - // There is no size information, so be conservative. - if (!type->isSized()) - return false; - - // If the alloc size is not equal to the storage size, then there are padding - // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) - return false; - - // FIXME: This isn't the right way to check for padding in vectors with - // non-byte-size elements. - if (VectorType *seqTy = dyn_cast<VectorType>(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - // For array types, check for padding within members. - if (ArrayType *seqTy = dyn_cast<ArrayType>(type)) - return isDenselyPacked(seqTy->getElementType(), DL); - - if (!isa<StructType>(type)) - return true; - - // Check for padding within and between elements of a struct. - StructType *StructTy = cast<StructType>(type); - const StructLayout *Layout = DL.getStructLayout(StructTy); - uint64_t StartPos = 0; - for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { - Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy, DL)) - return false; - if (StartPos != Layout->getElementOffsetInBits(i)) - return false; - StartPos += DL.getTypeAllocSizeInBits(ElTy); - } - - return true; -} - -/// Checks if the padding bytes of an argument could be accessed. -static bool canPaddingBeAccessed(Argument *arg) { - assert(arg->hasByValAttr()); - - // Track all the pointers to the argument to make sure they are not captured. - SmallPtrSet<Value *, 16> PtrValues; - PtrValues.insert(arg); - - // Track all of the stores. - SmallVector<StoreInst *, 16> Stores; - - // Scan through the uses recursively to make sure the pointer is always used - // sanely. - SmallVector<Value *, 16> WorkList(arg->users()); - while (!WorkList.empty()) { - Value *V = WorkList.pop_back_val(); - if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) { - if (PtrValues.insert(V).second) - llvm::append_range(WorkList, V->users()); - } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) { - Stores.push_back(Store); - } else if (!isa<LoadInst>(V)) { - return true; - } - } - - // Check to make sure the pointers aren't captured - for (StoreInst *Store : Stores) - if (PtrValues.count(Store->getValueOperand())) - return true; - - return false; -} - -/// Check if callers and the callee \p F agree how promoted arguments would be -/// passed. The ones that they do not agree on are eliminated from the sets but -/// the return value has to be observed as well. -static bool areFunctionArgsABICompatible( - const Function &F, const TargetTransformInfo &TTI, - SmallPtrSetImpl<Argument *> &ArgsToPromote, - SmallPtrSetImpl<Argument *> &ByValArgsToTransform) { - // TODO: Check individual arguments so we can promote a subset? - SmallVector<Type *, 32> Types; - for (Argument *Arg : ArgsToPromote) - Types.push_back(Arg->getType()->getPointerElementType()); - for (Argument *Arg : ByValArgsToTransform) - Types.push_back(Arg->getParamByValType()); - - for (const Use &U : F.uses()) { +/// Check if callers and callee agree on how promoted arguments would be +/// passed. +static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, + const TargetTransformInfo &TTI) { + return all_of(F.uses(), [&](const Use &U) { CallBase *CB = dyn_cast<CallBase>(U.getUser()); if (!CB) return false; + const Function *Caller = CB->getCaller(); const Function *Callee = CB->getCalledFunction(); - if (!TTI.areTypesABICompatible(Caller, Callee, Types)) - return false; - } - return true; + return TTI.areTypesABICompatible(Caller, Callee, Types); + }); } /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. -static Function * -promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, - unsigned MaxElements, - Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> - ReplaceCallSite, - const TargetTransformInfo &TTI) { +static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, + unsigned MaxElements, bool IsRecursive) { // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. - if(F->hasFnAttribute(Attribute::Naked)) + if (F->hasFnAttribute(Attribute::Naked)) return nullptr; // Make sure that it is local to this module. @@ -903,20 +737,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function - // is self-recursive and check that target features are compatible. - bool isSelfRecursive = false; + // is self-recursive. for (Use &U : F->uses()) { CallBase *CB = dyn_cast<CallBase>(U.getUser()); // Must be a direct call. - if (CB == nullptr || !CB->isCallee(&U)) + if (CB == nullptr || !CB->isCallee(&U) || + CB->getFunctionType() != F->getFunctionType()) return nullptr; // Can't change signature of musttail callee if (CB->isMustTailCall()) return nullptr; - if (CB->getParent()->getParent() == F) - isSelfRecursive = true; + if (CB->getFunction() == F) + IsRecursive = true; } // Can't change signature of musttail caller @@ -926,16 +760,13 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, return nullptr; const DataLayout &DL = F->getParent()->getDataLayout(); - - AAResults &AAR = AARGetter(*F); + auto &AAR = FAM.getResult<AAManager>(*F); + const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. - SmallPtrSet<Argument *, 8> ArgsToPromote; - SmallPtrSet<Argument *, 8> ByValArgsToTransform; + DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; for (Argument *PtrArg : PointerArgs) { - Type *AgTy = PtrArg->getType()->getPointerElementType(); - // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { @@ -949,72 +780,25 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, } } - // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. - // - // Only handle arguments with specified alignment; if it's unspecified, the - // actual alignment of the argument is target-specific. - bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() && - (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || - !canPaddingBeAccessed(PtrArg)); - if (isSafeToPromote) { - if (StructType *STy = dyn_cast<StructType>(AgTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '" - << PtrArg->getName() - << "' because it would require adding more" - << " than " << MaxElements - << " arguments to the function.\n"); - continue; - } + // If we can promote the pointer to its value. + SmallVector<OffsetAndArgPart, 4> ArgParts; - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (const auto *EltTy : STy->elements()) { - if (!EltTy->isSingleValueType()) { - AllSimple = false; - break; - } - } - - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow sroa to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; - } - } - } + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + SmallVector<Type *, 4> Types; + for (const auto &Pair : ArgParts) + Types.push_back(Pair.second.Ty); - // If the argument is a recursive type and we're in a recursive - // function, we could end up infinitely peeling the function argument. - if (isSelfRecursive) { - if (StructType *STy = dyn_cast<StructType>(AgTy)) { - bool RecursiveType = - llvm::is_contained(STy->elements(), PtrArg->getType()); - if (RecursiveType) - continue; + if (areTypesABICompatible(Types, *F, TTI)) { + ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); } } - - // Otherwise, see if we can promote the pointer to its value. - Type *ByValTy = - PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; - if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) - ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + if (ArgsToPromote.empty()) return nullptr; - if (!areFunctionArgsABICompatible( - *F, TTI, ArgsToPromote, ByValArgsToTransform)) - return nullptr; - - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); + return doPromotion(F, FAM, ArgsToPromote); } PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, @@ -1030,19 +814,10 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + bool IsRecursive = C.size() > 1; for (LazyCallGraph::Node &N : C) { Function &OldF = N.getFunction(); - - // FIXME: This lambda must only be used with this function. We should - // skip the lambda and just get the AA results directly. - auto AARGetter = [&](Function &F) -> AAResults & { - assert(&F == &OldF && "Called with an unexpected function!"); - return FAM.getResult<AAManager>(F); - }; - - const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF); - Function *NewF = - promoteArguments(&OldF, AARGetter, MaxElements, None, TTI); + Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive); if (!NewF) continue; LocalChange = true; @@ -1077,111 +852,3 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, PA.preserveSet<AllAnalysesOn<Function>>(); return PA; } - -namespace { - -/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. -struct ArgPromotion : public CallGraphSCCPass { - // Pass identification, replacement for typeid - static char ID; - - explicit ArgPromotion(unsigned MaxElements = 3) - : CallGraphSCCPass(ID), MaxElements(MaxElements) { - initializeArgPromotionPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); - } - - bool runOnSCC(CallGraphSCC &SCC) override; - -private: - using llvm::Pass::doInitialization; - - bool doInitialization(CallGraph &CG) override; - - /// The maximum number of elements to expand, or 0 for unlimited. - unsigned MaxElements; -}; - -} // end anonymous namespace - -char ArgPromotion::ID = 0; - -INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, - false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) - -Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { - return new ArgPromotion(MaxElements); -} - -bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; - - // Get the callgraph information that we need to update to reflect our - // changes. - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); - - LegacyAARGetter AARGetter(*this); - - bool Changed = false, LocalChange; - - // Iterate until we stop promoting from this SCC. - do { - LocalChange = false; - // Attempt to promote arguments from all functions in this SCC. - for (CallGraphNode *OldNode : SCC) { - Function *OldF = OldNode->getFunction(); - if (!OldF) - continue; - - auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) { - Function *Caller = OldCS.getParent()->getParent(); - CallGraphNode *NewCalleeNode = - CG.getOrInsertFunction(NewCS.getCalledFunction()); - CallGraphNode *CallerNode = CG[Caller]; - CallerNode->replaceCallEdge(cast<CallBase>(OldCS), - cast<CallBase>(NewCS), NewCalleeNode); - }; - - const TargetTransformInfo &TTI = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF); - if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, - {ReplaceCallSite}, TTI)) { - LocalChange = true; - - // Update the call graph for the newly promoted function. - CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); - NewNode->stealCalledFunctionsFrom(OldNode); - if (OldNode->getNumReferences() == 0) - delete CG.removeFunctionFromModule(OldNode); - else - OldF->setLinkage(Function::ExternalLinkage); - - // And updat ethe SCC we're iterating as well. - SCC.ReplaceNode(OldNode, NewNode); - } - } - // Remember that we changed something. - Changed |= LocalChange; - } while (LocalChange); - - return Changed; -} - -bool ArgPromotion::doInitialization(CallGraph &CG) { - return CallGraphSCCPass::doInitialization(CG); -} diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index d66140a726f6..b05b7990e3f0 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -15,29 +15,25 @@ #include "llvm/Transforms/IPO/Attributor.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/NoFolder.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -50,6 +46,10 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#ifdef EXPENSIVE_CHECKS +#include "llvm/IR/Verifier.h" +#endif + #include <cassert> #include <string> @@ -123,13 +123,13 @@ static cl::list<std::string> SeedAllowList("attributor-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of attribute names that are " "allowed to be seeded."), - cl::ZeroOrMore, cl::CommaSeparated); + cl::CommaSeparated); static cl::list<std::string> FunctionSeedAllowList( "attributor-function-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of function names that are " "allowed to be seeded."), - cl::ZeroOrMore, cl::CommaSeparated); + cl::CommaSeparated); #endif static cl::opt<bool> @@ -209,33 +209,25 @@ bool AA::isNoSyncInst(Attributor &A, const Instruction &I, } bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, - const Value &V) { - if (auto *C = dyn_cast<Constant>(&V)) - return !C->isThreadDependent(); - // TODO: Inspect and cache more complex instructions. - if (auto *CB = dyn_cast<CallBase>(&V)) - return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() && - !CB->mayReadFromMemory(); - const Function *Scope = nullptr; - if (auto *I = dyn_cast<Instruction>(&V)) - Scope = I->getFunction(); - if (auto *A = dyn_cast<Argument>(&V)) - Scope = A->getParent(); - if (!Scope) + const Value &V, bool ForAnalysisOnly) { + // TODO: See the AAInstanceInfo class comment. + if (!ForAnalysisOnly) return false; - auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - QueryingAA, IRPosition::function(*Scope), DepClassTy::OPTIONAL); - return NoRecurseAA.isAssumedNoRecurse(); + auto &InstanceInfoAA = A.getAAFor<AAInstanceInfo>( + QueryingAA, IRPosition::value(V), DepClassTy::OPTIONAL); + return InstanceInfoAA.isAssumedUniqueForAnalysis(); } Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty, const TargetLibraryInfo *TLI) { if (isa<AllocaInst>(Obj)) return UndefValue::get(&Ty); - if (isAllocationFn(&Obj, TLI)) - return getInitialValueOfAllocation(&cast<CallBase>(Obj), TLI, &Ty); + if (Constant *Init = getInitialValueOfAllocation(&Obj, TLI, &Ty)) + return Init; auto *GV = dyn_cast<GlobalVariable>(&Obj); - if (!GV || !GV->hasLocalLinkage()) + if (!GV) + return nullptr; + if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer())) return nullptr; if (!GV->hasInitializer()) return UndefValue::get(&Ty); @@ -252,19 +244,29 @@ bool AA::isValidInScope(const Value &V, const Function *Scope) { return false; } -bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI, +bool AA::isValidAtPosition(const AA::ValueAndContext &VAC, InformationCache &InfoCache) { - if (isa<Constant>(V)) + if (isa<Constant>(VAC.getValue()) || VAC.getValue() == VAC.getCtxI()) return true; - const Function *Scope = CtxI.getFunction(); - if (auto *A = dyn_cast<Argument>(&V)) + const Function *Scope = nullptr; + const Instruction *CtxI = VAC.getCtxI(); + if (CtxI) + Scope = CtxI->getFunction(); + if (auto *A = dyn_cast<Argument>(VAC.getValue())) return A->getParent() == Scope; - if (auto *I = dyn_cast<Instruction>(&V)) + if (auto *I = dyn_cast<Instruction>(VAC.getValue())) { if (I->getFunction() == Scope) { - const DominatorTree *DT = - InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Scope); - return DT && DT->dominates(I, &CtxI); + if (const DominatorTree *DT = + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( + *Scope)) + return DT->dominates(I, CtxI); + // Local dominance check mostly for the old PM passes. + if (CtxI && I->getParent() == CtxI->getParent()) + return llvm::any_of( + make_range(I->getIterator(), I->getParent()->end()), + [&](const Instruction &AfterI) { return &AfterI == CtxI; }); } + } return false; } @@ -295,11 +297,11 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A, const Optional<Value *> &B, Type *Ty) { if (A == B) return A; - if (!B.hasValue()) + if (!B) return A; if (*B == nullptr) return nullptr; - if (!A.hasValue()) + if (!A) return Ty ? getWithType(**B, *Ty) : nullptr; if (*A == nullptr) return nullptr; @@ -314,21 +316,33 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A, return nullptr; } -bool AA::getPotentialCopiesOfStoredValue( - Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies, - const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) { +template <bool IsLoad, typename Ty> +static bool getPotentialCopiesOfMemoryValue( + Attributor &A, Ty &I, SmallSetVector<Value *, 4> &PotentialCopies, + SmallSetVector<Instruction *, 4> &PotentialValueOrigins, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I + << " (only exact: " << OnlyExact << ")\n";); - Value &Ptr = *SI.getPointerOperand(); + Value &Ptr = *I.getPointerOperand(); SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI)) { + if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I, + UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "Underlying objects stored into could not be determined\n";); return false; } + // Containers to remember the pointer infos and new copies while we are not + // sure that we can find all of them. If we abort we want to avoid spurious + // dependences and potential copies in the provided container. SmallVector<const AAPointerInfo *> PIs; SmallVector<Value *> NewCopies; + SmallVector<Instruction *> NewCopyOrigins; + const auto *TLI = + A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction()); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa<UndefValue>(Obj)) @@ -336,7 +350,7 @@ bool AA::getPotentialCopiesOfStoredValue( if (isa<ConstantPointerNull>(Obj)) { // A null pointer access can be undefined but any offset from null may // be OK. We do not try to optimize the latter. - if (!NullPointerIsDefined(SI.getFunction(), + if (!NullPointerIsDefined(I.getFunction(), Ptr.getType()->getPointerAddressSpace()) && A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) == Obj) @@ -345,37 +359,74 @@ bool AA::getPotentialCopiesOfStoredValue( dbgs() << "Underlying object is a valid nullptr, giving up.\n";); return false; } + // TODO: Use assumed noalias return. if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj) && - !isNoAliasCall(Obj)) { + !(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) { LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj << "\n";); return false; } if (auto *GV = dyn_cast<GlobalVariable>(Obj)) - if (!GV->hasLocalLinkage()) { + if (!GV->hasLocalLinkage() && + !(GV->isConstant() && GV->hasInitializer())) { LLVM_DEBUG(dbgs() << "Underlying object is global with external " "linkage, not supported yet: " << *Obj << "\n";); return false; } + if (IsLoad) { + Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); + if (!InitialValue) + return false; + NewCopies.push_back(InitialValue); + NewCopyOrigins.push_back(nullptr); + } + auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { - if (!Acc.isRead()) + if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) + return true; + if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; - auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst()); - if (!LI) { - LLVM_DEBUG(dbgs() << "Underlying object read through a non-load " - "instruction not supported yet: " - << *Acc.getRemoteInst() << "\n";); + if (OnlyExact && !IsExact && + !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) { + LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() + << ", abort!\n"); return false; } - NewCopies.push_back(LI); + if (IsLoad) { + assert(isa<LoadInst>(I) && "Expected load or store instruction only!"); + if (!Acc.isWrittenValueUnknown()) { + NewCopies.push_back(Acc.getWrittenValue()); + NewCopyOrigins.push_back(Acc.getRemoteInst()); + return true; + } + auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst()); + if (!SI) { + LLVM_DEBUG(dbgs() << "Underlying object written through a non-store " + "instruction not supported yet: " + << *Acc.getRemoteInst() << "\n";); + return false; + } + NewCopies.push_back(SI->getValueOperand()); + NewCopyOrigins.push_back(SI); + } else { + assert(isa<StoreInst>(I) && "Expected load or store instruction only!"); + auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst()); + if (!LI && OnlyExact) { + LLVM_DEBUG(dbgs() << "Underlying object read through a non-load " + "instruction not supported yet: " + << *Acc.getRemoteInst() << "\n";); + return false; + } + NewCopies.push_back(Acc.getRemoteInst()); + } return true; }; auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); - if (!PI.forallInterferingAccesses(SI, CheckAccess)) { + if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " @@ -385,16 +436,40 @@ bool AA::getPotentialCopiesOfStoredValue( PIs.push_back(&PI); } + // Only if we were successful collection all potential copies we record + // dependences (on non-fix AAPointerInfo AAs). We also only then modify the + // given PotentialCopies container. for (auto *PI : PIs) { if (!PI->getState().isAtFixpoint()) UsedAssumedInformation = true; A.recordDependence(*PI, QueryingAA, DepClassTy::OPTIONAL); } PotentialCopies.insert(NewCopies.begin(), NewCopies.end()); + PotentialValueOrigins.insert(NewCopyOrigins.begin(), NewCopyOrigins.end()); return true; } +bool AA::getPotentiallyLoadedValues( + Attributor &A, LoadInst &LI, SmallSetVector<Value *, 4> &PotentialValues, + SmallSetVector<Instruction *, 4> &PotentialValueOrigins, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + return getPotentialCopiesOfMemoryValue</* IsLoad */ true>( + A, LI, PotentialValues, PotentialValueOrigins, QueryingAA, + UsedAssumedInformation, OnlyExact); +} + +bool AA::getPotentialCopiesOfStoredValue( + Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + SmallSetVector<Instruction *, 4> PotentialValueOrigins; + return getPotentialCopiesOfMemoryValue</* IsLoad */ false>( + A, SI, PotentialCopies, PotentialValueOrigins, QueryingAA, + UsedAssumedInformation, OnlyExact); +} + static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP, const AbstractAttribute &QueryingAA, bool RequireReadNone, bool &IsKnown) { @@ -449,6 +524,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, SmallVector<const Instruction *> Worklist; Worklist.push_back(&FromI); + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); while (!Worklist.empty()) { const Instruction *CurFromI = Worklist.pop_back_val(); if (!Visited.insert(CurFromI).second) @@ -468,7 +545,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << *ToI << " [Intra]\n"); if (Result) return true; - continue; + if (NoRecurseAA.isAssumedNoRecurse()) + continue; } // TODO: If we can go arbitrarily backwards we will eventually reach an @@ -514,10 +592,10 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, return true; }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; Result = !A.checkForAllCallSites(CheckCallSite, *FromFn, /* RequireAllCallSites */ true, - &QueryingAA, AllCallSitesKnown); + &QueryingAA, UsedAssumedInformation); if (Result) { LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI << " in @" << FromFn->getName() @@ -631,7 +709,7 @@ Argument *IRPosition::getAssociatedArgument() const { assert(ACS.getCalledFunction()->arg_size() > u && "ACS mapped into var-args arguments!"); - if (CBCandidateArg.hasValue()) { + if (CBCandidateArg) { CBCandidateArg = nullptr; break; } @@ -640,7 +718,7 @@ Argument *IRPosition::getAssociatedArgument() const { } // If we found a unique callback candidate argument, return it. - if (CBCandidateArg.hasValue() && CBCandidateArg.getValue()) + if (CBCandidateArg && CBCandidateArg.getValue()) return CBCandidateArg.getValue(); // If no callbacks were found, or none used the underlying call site operand @@ -949,22 +1027,24 @@ Attributor::getAssumedConstant(const IRPosition &IRP, bool &UsedAssumedInformation) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we - // assume it's simpliied. + // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) { Optional<Value *> SimplifiedV = CB(IRP, &AA, UsedAssumedInformation); - if (!SimplifiedV.hasValue()) + if (!SimplifiedV) return llvm::None; if (isa_and_nonnull<Constant>(*SimplifiedV)) return cast<Constant>(*SimplifiedV); return nullptr; } + if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue())) + return C; const auto &ValueSimplifyAA = getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE); Optional<Value *> SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(*this); bool IsKnown = ValueSimplifyAA.isAtFixpoint(); UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV.hasValue()) { + if (!SimplifiedV) { recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); return llvm::None; } @@ -987,18 +1067,18 @@ Attributor::getAssumedSimplified(const IRPosition &IRP, bool &UsedAssumedInformation) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we - // assume it's simpliied. + // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) return CB(IRP, AA, UsedAssumedInformation); - // If no high-level/outside simplification occured, use AAValueSimplify. + // If no high-level/outside simplification occurred, use AAValueSimplify. const auto &ValueSimplifyAA = getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE); Optional<Value *> SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(*this); bool IsKnown = ValueSimplifyAA.isAtFixpoint(); UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV.hasValue()) { + if (!SimplifiedV) { if (AA) recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); return llvm::None; @@ -1017,7 +1097,7 @@ Attributor::getAssumedSimplified(const IRPosition &IRP, Optional<Value *> Attributor::translateArgumentToCallSiteContent( Optional<Value *> V, CallBase &CB, const AbstractAttribute &AA, bool &UsedAssumedInformation) { - if (!V.hasValue()) + if (!V) return V; if (*V == nullptr || isa<Constant>(*V)) return V; @@ -1078,6 +1158,19 @@ bool Attributor::isAssumedDead(const Use &U, BasicBlock *IncomingBB = PHI->getIncomingBlock(U); return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA, UsedAssumedInformation, CheckBBLivenessOnly, DepClass); + } else if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { + if (!CheckBBLivenessOnly && SI->getPointerOperand() != U.get()) { + const IRPosition IRP = IRPosition::inst(*SI); + const AAIsDead &IsDeadAA = + getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE); + if (IsDeadAA.isRemovableStore()) { + if (QueryingAA) + recordDependence(IsDeadAA, *QueryingAA, DepClass); + if (!IsDeadAA.isKnown(AAIsDead::IS_REMOVABLE)) + UsedAssumedInformation = true; + return true; + } + } } return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA, @@ -1191,6 +1284,7 @@ bool Attributor::checkForAllUses( function_ref<bool(const Use &, bool &)> Pred, const AbstractAttribute &QueryingAA, const Value &V, bool CheckBBLivenessOnly, DepClassTy LivenessDepClass, + bool IgnoreDroppableUses, function_ref<bool(const Use &OldU, const Use &NewU)> EquivalentUseCB) { // Check the trivial case first as it catches void values. @@ -1231,7 +1325,7 @@ bool Attributor::checkForAllUses( LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); continue; } - if (U->getUser()->isDroppable()) { + if (IgnoreDroppableUses && U->getUser()->isDroppable()) { LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n"); continue; } @@ -1241,9 +1335,9 @@ bool Attributor::checkForAllUses( if (!Visited.insert(U).second) continue; SmallSetVector<Value *, 4> PotentialCopies; - if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies, - QueryingAA, - UsedAssumedInformation)) { + if (AA::getPotentialCopiesOfStoredValue( + *this, *SI, PotentialCopies, QueryingAA, UsedAssumedInformation, + /* OnlyExact */ true)) { LLVM_DEBUG(dbgs() << "[Attributor] Value is stored, continue with " << PotentialCopies.size() << " potential copies instead!\n"); @@ -1277,7 +1371,7 @@ bool Attributor::checkForAllUses( bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, const AbstractAttribute &QueryingAA, bool RequireAllCallSites, - bool &AllCallSitesKnown) { + bool &UsedAssumedInformation) { // We can try to determine information from // the call sites. However, this is only possible all call sites are known, // hence the function has internal linkage. @@ -1286,31 +1380,26 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, if (!AssociatedFunction) { LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP << "\n"); - AllCallSitesKnown = false; return false; } return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites, - &QueryingAA, AllCallSitesKnown); + &QueryingAA, UsedAssumedInformation); } bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, const Function &Fn, bool RequireAllCallSites, const AbstractAttribute *QueryingAA, - bool &AllCallSitesKnown) { + bool &UsedAssumedInformation) { if (RequireAllCallSites && !Fn.hasLocalLinkage()) { LLVM_DEBUG( dbgs() << "[Attributor] Function " << Fn.getName() << " has no internal linkage, hence not all call sites are known\n"); - AllCallSitesKnown = false; return false; } - // If we do not require all call sites we might not see all. - AllCallSitesKnown = RequireAllCallSites; - SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses())); for (unsigned u = 0; u < Uses.size(); ++u) { const Use &U = *Uses[u]; @@ -1322,15 +1411,13 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser() << "\n"; }); - bool UsedAssumedInformation = false; if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation, /* CheckBBLivenessOnly */ true)) { LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); continue; } if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) { - if (CE->isCast() && CE->getType()->isPointerTy() && - CE->getType()->getPointerElementType()->isFunctionTy()) { + if (CE->isCast() && CE->getType()->isPointerTy()) { LLVM_DEBUG( dbgs() << "[Attributor] Use, is constant cast expression, add " << CE->getNumUses() @@ -1477,30 +1564,24 @@ static bool checkForAllInstructionsImpl( } bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred, + const Function *Fn, const AbstractAttribute &QueryingAA, const ArrayRef<unsigned> &Opcodes, bool &UsedAssumedInformation, bool CheckBBLivenessOnly, bool CheckPotentiallyDead) { - - const IRPosition &IRP = QueryingAA.getIRPosition(); // Since we need to provide instructions we have to have an exact definition. - const Function *AssociatedFunction = IRP.getAssociatedFunction(); - if (!AssociatedFunction) - return false; - - if (AssociatedFunction->isDeclaration()) + if (!Fn || Fn->isDeclaration()) return false; // TODO: use the function scope once we have call site AAReturnedValues. - const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const IRPosition &QueryIRP = IRPosition::function(*Fn); const auto *LivenessAA = (CheckBBLivenessOnly || CheckPotentiallyDead) ? nullptr : &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE)); - auto &OpcodeInstMap = - InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, LivenessAA, Opcodes, UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead)) @@ -1509,6 +1590,19 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred, return true; } +bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred, + const AbstractAttribute &QueryingAA, + const ArrayRef<unsigned> &Opcodes, + bool &UsedAssumedInformation, + bool CheckBBLivenessOnly, + bool CheckPotentiallyDead) { + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + return checkForAllInstructions(Pred, AssociatedFunction, QueryingAA, Opcodes, + UsedAssumedInformation, CheckBBLivenessOnly, + CheckPotentiallyDead); +} + bool Attributor::checkForAllReadWriteInstructions( function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) { @@ -1547,11 +1641,8 @@ void Attributor::runTillFixpoint() { // the abstract analysis. unsigned IterationCounter = 1; - unsigned MaxFixedPointIterations; - if (MaxFixpointIterations) - MaxFixedPointIterations = MaxFixpointIterations.getValue(); - else - MaxFixedPointIterations = SetFixpointIterations; + unsigned MaxIterations = + Configuration.MaxFixpointIterations.value_or(SetFixpointIterations); SmallVector<AbstractAttribute *, 32> ChangedAAs; SetVector<AbstractAttribute *> Worklist, InvalidAAs; @@ -1636,21 +1727,20 @@ void Attributor::runTillFixpoint() { QueryAAsAwaitingUpdate.end()); QueryAAsAwaitingUpdate.clear(); - } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || - VerifyMaxFixpointIterations)); + } while (!Worklist.empty() && + (IterationCounter++ < MaxIterations || VerifyMaxFixpointIterations)); - if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) { + if (IterationCounter > MaxIterations && !Functions.empty()) { auto Remark = [&](OptimizationRemarkMissed ORM) { return ORM << "Attributor did not reach a fixpoint after " - << ore::NV("Iterations", MaxFixedPointIterations) - << " iterations."; + << ore::NV("Iterations", MaxIterations) << " iterations."; }; - Function *F = Worklist.front()->getIRPosition().getAssociatedFunction(); + Function *F = Functions.front(); emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark); } LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " - << IterationCounter << "/" << MaxFixpointIterations + << IterationCounter << "/" << MaxIterations << " iterations\n"); // Reset abstract arguments not settled in a sound fixpoint by now. This @@ -1684,11 +1774,9 @@ void Attributor::runTillFixpoint() { << " abstract attributes.\n"; }); - if (VerifyMaxFixpointIterations && - IterationCounter != MaxFixedPointIterations) { + if (VerifyMaxFixpointIterations && IterationCounter != MaxIterations) { errs() << "\n[Attributor] Fixpoint iteration done after: " - << IterationCounter << "/" << MaxFixedPointIterations - << " iterations\n"; + << IterationCounter << "/" << MaxIterations << " iterations\n"; llvm_unreachable("The fixpoint was not reached with exactly the number of " "specified iterations!"); } @@ -1725,6 +1813,9 @@ ChangeStatus Attributor::manifestAttributes() { if (!State.isValidState()) continue; + if (AA->getCtxI() && !isRunOn(*AA->getAnchorScope())) + continue; + // Skip dead code. bool UsedAssumedInformation = false; if (isAssumedDead(*AA, nullptr, UsedAssumedInformation, @@ -1774,7 +1865,7 @@ ChangeStatus Attributor::manifestAttributes() { void Attributor::identifyDeadInternalFunctions() { // Early exit if we don't intend to delete functions. - if (!DeleteFns) + if (!Configuration.DeleteFns) return; // Identify dead internal functions and delete them. This happens outside @@ -1795,7 +1886,7 @@ void Attributor::identifyDeadInternalFunctions() { if (!F) continue; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (checkForAllCallSites( [&](AbstractCallSite ACS) { Function *Callee = ACS.getInstruction()->getFunction(); @@ -1803,7 +1894,7 @@ void Attributor::identifyDeadInternalFunctions() { (Functions.count(Callee) && Callee->hasLocalLinkage() && !LiveInternalFns.count(Callee)); }, - *F, true, nullptr, AllCallSitesKnown)) { + *F, true, nullptr, UsedAssumedInformation)) { continue; } @@ -1826,7 +1917,8 @@ ChangeStatus Attributor::cleanupIR() { << ToBeDeletedBlocks.size() << " blocks and " << ToBeDeletedInsts.size() << " instructions and " << ToBeChangedValues.size() << " values and " - << ToBeChangedUses.size() << " uses. " + << ToBeChangedUses.size() << " uses. To insert " + << ToBeChangedToUnreachableInsts.size() << " unreachables." << "Preserve manifest added " << ManifestAddedBlocks.size() << " blocks\n"); @@ -1844,12 +1936,15 @@ ChangeStatus Attributor::cleanupIR() { NewV = Entry.first; } while (true); + Instruction *I = dyn_cast<Instruction>(U->getUser()); + assert((!I || isRunOn(*I->getFunction())) && + "Cannot replace an instruction outside the current SCC!"); + // Do not replace uses in returns if the value is a must-tail call we will // not delete. - if (auto *RI = dyn_cast<ReturnInst>(U->getUser())) { + if (auto *RI = dyn_cast_or_null<ReturnInst>(I)) { if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts())) - if (CI->isMustTailCall() && - (!ToBeDeletedInsts.count(CI) || !isRunOn(*CI->getCaller()))) + if (CI->isMustTailCall() && !ToBeDeletedInsts.count(CI)) return; // If we rewrite a return and the new value is not an argument, strip the // `returned` attribute as it is wrong now. @@ -1859,8 +1954,8 @@ ChangeStatus Attributor::cleanupIR() { } // Do not perform call graph altering changes outside the SCC. - if (auto *CB = dyn_cast<CallBase>(U->getUser())) - if (CB->isCallee(U) && !isRunOn(*CB->getCaller())) + if (auto *CB = dyn_cast_or_null<CallBase>(I)) + if (CB->isCallee(U)) return; LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser() @@ -1908,8 +2003,12 @@ ChangeStatus Attributor::cleanupIR() { for (auto &U : OldV->uses()) if (Entry.second || !U.getUser()->isDroppable()) Uses.push_back(&U); - for (Use *U : Uses) + for (Use *U : Uses) { + if (auto *I = dyn_cast<Instruction>(U->getUser())) + if (!isRunOn(*I->getFunction())) + continue; ReplaceUse(U, NewV); + } } for (auto &V : InvokeWithDeadSuccessor) @@ -1940,15 +2039,15 @@ ChangeStatus Attributor::cleanupIR() { } } for (Instruction *I : TerminatorsToFold) { - if (!isRunOn(*I->getFunction())) - continue; + assert(isRunOn(*I->getFunction()) && + "Cannot replace a terminator outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); ConstantFoldTerminator(I->getParent()); } for (auto &V : ToBeChangedToUnreachableInsts) if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (!isRunOn(*I->getFunction())) - continue; + assert(isRunOn(*I->getFunction()) && + "Cannot replace an instruction outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); changeToUnreachable(I); } @@ -1956,10 +2055,10 @@ ChangeStatus Attributor::cleanupIR() { for (auto &V : ToBeDeletedInsts) { if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { if (auto *CB = dyn_cast<CallBase>(I)) { - if (!isRunOn(*I->getFunction())) - continue; + assert(isRunOn(*I->getFunction()) && + "Cannot delete an instruction outside the current SCC!"); if (!isa<IntrinsicInst>(CB)) - CGUpdater.removeCallSite(*CB); + Configuration.CGUpdater.removeCallSite(*CB); } I->dropDroppableUses(); CGModifiedFunctions.insert(I->getFunction()); @@ -1972,9 +2071,7 @@ ChangeStatus Attributor::cleanupIR() { } } - llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) { - return !I || !isRunOn(*cast<Instruction>(I)->getFunction()); - }); + llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) { return !I; }); LLVM_DEBUG({ dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() << "\n"; @@ -2010,12 +2107,12 @@ ChangeStatus Attributor::cleanupIR() { for (Function *Fn : CGModifiedFunctions) if (!ToBeDeletedFunctions.count(Fn) && Functions.count(Fn)) - CGUpdater.reanalyzeFunction(*Fn); + Configuration.CGUpdater.reanalyzeFunction(*Fn); for (Function *Fn : ToBeDeletedFunctions) { if (!Functions.count(Fn)) continue; - CGUpdater.removeFunction(*Fn); + Configuration.CGUpdater.removeFunction(*Fn); } if (!ToBeChangedUses.empty()) @@ -2254,7 +2351,7 @@ bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef<Type *> ReplacementTypes) { - if (!RewriteSignatures) + if (!Configuration.RewriteSignatures) return false; Function *Fn = Arg.getParent(); @@ -2290,9 +2387,9 @@ bool Attributor::isValidFunctionSignatureRewrite( } // Avoid callbacks for now. - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr, - AllCallSitesKnown)) { + UsedAssumedInformation)) { LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n"); return false; } @@ -2305,7 +2402,6 @@ bool Attributor::isValidFunctionSignatureRewrite( // Forbid must-tail calls for now. // TODO: - bool UsedAssumedInformation = false; auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr, nullptr, {Instruction::Call}, @@ -2370,7 +2466,7 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) { } ChangeStatus Attributor::rewriteFunctionSignatures( - SmallPtrSetImpl<Function *> &ModifiedFns) { + SmallSetVector<Function *, 8> &ModifiedFns) { ChangeStatus Changed = ChangeStatus::UNCHANGED; for (auto &It : ArgumentReplacementMap) { @@ -2403,6 +2499,12 @@ ChangeStatus Attributor::rewriteFunctionSignatures( } } + uint64_t LargestVectorWidth = 0; + for (auto *I : NewArgumentTypes) + if (auto *VT = dyn_cast<llvm::VectorType>(I)) + LargestVectorWidth = std::max( + LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); + FunctionType *OldFnTy = OldFn->getFunctionType(); Type *RetTy = OldFnTy->getReturnType(); @@ -2432,6 +2534,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures( NewFn->setAttributes(AttributeList::get( Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(), NewArgumentAttributes)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NewFn, LargestVectorWidth); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the @@ -2509,14 +2612,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures( Ctx, OldCallAttributeList.getFnAttrs(), OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes)); + AttributeFuncs::updateMinLegalVectorWidthAttr(*NewCB->getCaller(), + LargestVectorWidth); + CallSitePairs.push_back({OldCB, NewCB}); return true; }; // Use the CallSiteReplacementCreator to create replacement call sites. - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn, - true, nullptr, AllCallSitesKnown); + true, nullptr, UsedAssumedInformation); (void)Success; assert(Success && "Assumed call site replacement to succeed!"); @@ -2529,6 +2635,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures( ARIs[OldArgNum]) { if (ARI->CalleeRepairCB) ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt); + if (ARI->ReplacementTypes.empty()) + OldFnArgIt->replaceAllUsesWith( + PoisonValue::get(OldFnArgIt->getType())); NewFnArgIt += ARI->ReplacementTypes.size(); } else { NewFnArgIt->takeName(&*OldFnArgIt); @@ -2544,17 +2653,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures( assert(OldCB.getType() == NewCB.getType() && "Cannot handle call sites with different types!"); ModifiedFns.insert(OldCB.getFunction()); - CGUpdater.replaceCallSite(OldCB, NewCB); + Configuration.CGUpdater.replaceCallSite(OldCB, NewCB); OldCB.replaceAllUsesWith(&NewCB); OldCB.eraseFromParent(); } // Replace the function in the call graph (if any). - CGUpdater.replaceFunctionWith(*OldFn, *NewFn); + Configuration.CGUpdater.replaceFunctionWith(*OldFn, *NewFn); // If the old function was modified and needed to be reanalyzed, the new one // does now. - if (ModifiedFns.erase(OldFn)) + if (ModifiedFns.remove(OldFn)) ModifiedFns.insert(NewFn); Changed = ChangeStatus::CHANGED; @@ -2574,6 +2683,30 @@ void InformationCache::initializeInformationCache(const Function &CF, // queried by abstract attributes during their initialization or update. // This has to happen before we create attributes. + DenseMap<const Value *, Optional<short>> AssumeUsesMap; + + // Add \p V to the assume uses map which track the number of uses outside of + // "visited" assumes. If no outside uses are left the value is added to the + // assume only use vector. + auto AddToAssumeUsesMap = [&](const Value &V) -> void { + SmallVector<const Instruction *> Worklist; + if (auto *I = dyn_cast<Instruction>(&V)) + Worklist.push_back(I); + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + Optional<short> &NumUses = AssumeUsesMap[I]; + if (!NumUses) + NumUses = I->getNumUses(); + NumUses = NumUses.getValue() - /* this assume */ 1; + if (NumUses.getValue() != 0) + continue; + AssumeOnlyValues.insert(I); + for (const Value *Op : I->operands()) + if (auto *OpI = dyn_cast<Instruction>(Op)) + Worklist.push_back(OpI); + } + }; + for (Instruction &I : instructions(&F)) { bool IsInterestingOpcode = false; @@ -2594,6 +2727,7 @@ void InformationCache::initializeInformationCache(const Function &CF, // For `must-tail` calls we remember the caller and callee. if (auto *Assume = dyn_cast<AssumeInst>(&I)) { fillMapFromAssume(*Assume, KnowledgeMap); + AddToAssumeUsesMap(*Assume->getArgOperand(0)); } else if (cast<CallInst>(I).isMustTailCall()) { FI.ContainsMustTailCall = true; if (const Function *Callee = cast<CallInst>(I).getCalledFunction()) @@ -2742,7 +2876,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { getOrCreateAAFor<AAIsDead>(RetPos); // Every function might be simplified. - getOrCreateAAFor<AAValueSimplify>(RetPos); + bool UsedAssumedInformation = false; + getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation); // Every returned value might be marked noundef. getOrCreateAAFor<AANoUndef>(RetPos); @@ -2834,7 +2969,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) { IRPosition CBRetPos = IRPosition::callsite_returned(CB); - getOrCreateAAFor<AAValueSimplify>(CBRetPos); + bool UsedAssumedInformation = false; + getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation); } for (int I = 0, E = CB.arg_size(); I < E; ++I) { @@ -2897,10 +3033,15 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { getOrCreateAAFor<AAAlign>( IRPosition::value(*cast<LoadInst>(I).getPointerOperand())); if (SimplifyAllLoads) - getOrCreateAAFor<AAValueSimplify>(IRPosition::value(I)); - } else - getOrCreateAAFor<AAAlign>( - IRPosition::value(*cast<StoreInst>(I).getPointerOperand())); + getAssumedSimplified(IRPosition::value(I), nullptr, + UsedAssumedInformation); + } else { + auto &SI = cast<StoreInst>(I); + getOrCreateAAFor<AAIsDead>(IRPosition::inst(I)); + getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr, + UsedAssumedInformation); + getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand())); + } return true; }; Success = checkForAllInstructionsImpl( @@ -2975,8 +3116,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, if (!S.isValidState()) OS << "full-set"; else { - for (auto &it : S.getAssumedSet()) - OS << it << ", "; + for (auto &It : S.getAssumedSet()) + OS << It << ", "; if (S.undefIsContained()) OS << "undef "; } @@ -3018,8 +3159,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, OS << " [" << Acc.getKind() << "] " << *Acc.getRemoteInst(); if (Acc.getLocalInst() != Acc.getRemoteInst()) OS << " via " << *Acc.getLocalInst(); - if (Acc.getContent().hasValue()) - OS << " [" << *Acc.getContent() << "]"; + if (Acc.getContent()) { + if (*Acc.getContent()) + OS << " [" << **Acc.getContent() << "]"; + else + OS << " [ <unknown> ]"; + } return OS; } ///} @@ -3032,7 +3177,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, SetVector<Function *> &Functions, AnalysisGetter &AG, CallGraphUpdater &CGUpdater, - bool DeleteFns) { + bool DeleteFns, bool IsModulePass) { if (Functions.empty()) return false; @@ -3045,8 +3190,10 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - Attributor A(Functions, InfoCache, CGUpdater, /* Allowed */ nullptr, - DeleteFns); + AttributorConfig AC(CGUpdater); + AC.IsModulePass = IsModulePass; + AC.DeleteFns = DeleteFns; + Attributor A(Functions, InfoCache, AC); // Create shallow wrappers for all functions that are not IPO amendable if (AllowShallowWrappers) @@ -3151,7 +3298,7 @@ PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr); if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns */ true)) { + /* DeleteFns */ true, /* IsModulePass */ true)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -3179,7 +3326,8 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C, BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns */ false)) { + /* DeleteFns */ false, + /* IsModulePass */ false)) { // FIXME: Think about passes we will preserve and add them here. PreservedAnalyses PA; PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); @@ -3255,7 +3403,8 @@ struct AttributorLegacyPass : public ModulePass { BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr); return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns*/ true); + /* DeleteFns*/ true, + /* IsModulePass */ true); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3292,7 +3441,8 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass { BumpPtrAllocator Allocator; InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater, - /* DeleteFns */ false); + /* DeleteFns */ false, + /* IsModulePass */ false); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2d88e329e093..4d99ce7e3175 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -14,9 +14,11 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -30,21 +32,29 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include <cassert> using namespace llvm; @@ -69,11 +79,11 @@ static cl::opt<unsigned, true> MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); -static cl::opt<unsigned> - MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden, - cl::desc("Maximum number of interfering writes to " - "check before assuming all might interfere."), - cl::init(6)); +static cl::opt<unsigned> MaxInterferingAccesses( + "attributor-max-interfering-accesses", cl::Hidden, + cl::desc("Maximum number of interfering accesses to " + "check before assuming all might interfere."), + cl::init(6)); STATISTIC(NumAAs, "Number of abstract attributes created"); @@ -140,6 +150,7 @@ PIPE_OPERATOR(AANonNull) PIPE_OPERATOR(AANoAlias) PIPE_OPERATOR(AADereferenceable) PIPE_OPERATOR(AAAlign) +PIPE_OPERATOR(AAInstanceInfo) PIPE_OPERATOR(AANoCapture) PIPE_OPERATOR(AAValueSimplify) PIPE_OPERATOR(AANoFree) @@ -150,7 +161,7 @@ PIPE_OPERATOR(AAMemoryLocation) PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) -PIPE_OPERATOR(AAPotentialValues) +PIPE_OPERATOR(AAPotentialConstantValues) PIPE_OPERATOR(AANoUndef) PIPE_OPERATOR(AACallEdges) PIPE_OPERATOR(AAFunctionReachability) @@ -170,6 +181,45 @@ ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S, } // namespace llvm +/// Checks if a type could have padding bytes. +static bool isDenselyPacked(Type *Ty, const DataLayout &DL) { + // There is no size information, so be conservative. + if (!Ty->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. + if (DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty)) + return false; + + // FIXME: This isn't the right way to check for padding in vectors with + // non-byte-size elements. + if (VectorType *SeqTy = dyn_cast<VectorType>(Ty)) + return isDenselyPacked(SeqTy->getElementType(), DL); + + // For array types, check for padding within members. + if (ArrayType *SeqTy = dyn_cast<ArrayType>(Ty)) + return isDenselyPacked(SeqTy->getElementType(), DL); + + if (!isa<StructType>(Ty)) + return true; + + // Check for padding within and between elements of a struct. + StructType *StructTy = cast<StructType>(Ty); + const StructLayout *Layout = DL.getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned I = 0, E = StructTy->getNumElements(); I < E; ++I) { + Type *ElTy = StructTy->getElementType(I); + if (!isDenselyPacked(ElTy, DL)) + return false; + if (StartPos != Layout->getElementOffsetInBits(I)) + return false; + StartPos += DL.getTypeAllocSizeInBits(ElTy); + } + + return true; +} + /// Get pointer operand of memory accessing instruction. If \p I is /// not a memory accessing instruction, return nullptr. If \p AllowVolatile, /// is set to false and the instruction is volatile, return nullptr. @@ -236,7 +286,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, } // Ensure the result has the requested type. - Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy, + Ptr->getName() + ".cast"); LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); return Ptr; @@ -251,25 +302,32 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, /// once. Note that the value used for the callback may still be the value /// associated with \p IRP (due to PHIs). To limit how much effort is invested, /// we will never visit more values than specified by \p MaxValues. -/// If \p Intraprocedural is set to true only values valid in the scope of -/// \p CtxI will be visited and simplification into other scopes is prevented. +/// If \p VS does not contain the Interprocedural bit, only values valid in the +/// scope of \p CtxI will be visited and simplification into other scopes is +/// prevented. template <typename StateTy> static bool genericValueTraversal( Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, StateTy &State, function_ref<bool(Value &, const Instruction *, StateTy &, bool)> VisitValueCB, - const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16, + const Instruction *CtxI, bool &UsedAssumedInformation, + bool UseValueSimplify = true, int MaxValues = 16, function_ref<Value *(Value *)> StripCB = nullptr, - bool Intraprocedural = false) { + AA::ValueScope VS = AA::Interprocedural) { - const AAIsDead *LivenessAA = nullptr; - if (IRP.getAnchorScope()) - LivenessAA = &A.getAAFor<AAIsDead>( - QueryingAA, - IRPosition::function(*IRP.getAnchorScope(), IRP.getCallBaseContext()), - DepClassTy::NONE); - bool AnyDead = false; + struct LivenessInfo { + const AAIsDead *LivenessAA = nullptr; + bool AnyDead = false; + }; + SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; + auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { + LivenessInfo &LI = LivenessAAs[&F]; + if (!LI.LivenessAA) + LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F), + DepClassTy::NONE); + return LI; + }; Value *InitialV = &IRP.getAssociatedValue(); using Item = std::pair<Value *, const Instruction *>; @@ -319,10 +377,9 @@ static bool genericValueTraversal( // Look through select instructions, visit assumed potential values. if (auto *SI = dyn_cast<SelectInst>(V)) { - bool UsedAssumedInformation = false; Optional<Constant *> C = A.getAssumedConstant( *SI->getCondition(), QueryingAA, UsedAssumedInformation); - bool NoValueYet = !C.hasValue(); + bool NoValueYet = !C; if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) continue; if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { @@ -340,12 +397,12 @@ static bool genericValueTraversal( // Look through phi nodes, visit all live operands. if (auto *PHI = dyn_cast<PHINode>(V)) { - assert(LivenessAA && - "Expected liveness in the presence of instructions!"); + LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction()); for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { - AnyDead = true; + if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { + LI.AnyDead = true; + UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint(); continue; } Worklist.push_back( @@ -355,9 +412,9 @@ static bool genericValueTraversal( } if (auto *Arg = dyn_cast<Argument>(V)) { - if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) { + if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) { SmallVector<Item> CallSiteValues; - bool AllCallSitesKnown = true; + bool UsedAssumedInformation = false; if (A.checkForAllCallSites( [&](AbstractCallSite ACS) { // Callbacks might not have a corresponding call site operand, @@ -368,7 +425,7 @@ static bool genericValueTraversal( CallSiteValues.push_back({CSOp, ACS.getInstruction()}); return true; }, - *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) { + *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) { Worklist.append(CallSiteValues); continue; } @@ -376,14 +433,13 @@ static bool genericValueTraversal( } if (UseValueSimplify && !isa<Constant>(V)) { - bool UsedAssumedInformation = false; Optional<Value *> SimpleV = A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); - if (!SimpleV.hasValue()) + if (!SimpleV) continue; Value *NewV = SimpleV.getValue(); if (NewV && NewV != V) { - if (!Intraprocedural || !CtxI || + if ((VS & AA::Interprocedural) || !CtxI || AA::isValidInScope(*NewV, CtxI->getFunction())) { Worklist.push_back({NewV, CtxI}); continue; @@ -391,6 +447,37 @@ static bool genericValueTraversal( } } + if (auto *LI = dyn_cast<LoadInst>(V)) { + bool UsedAssumedInformation = false; + // If we ask for the potentially loaded values from the initial pointer we + // will simply end up here again. The load is as far as we can make it. + if (LI->getPointerOperand() != InitialV) { + SmallSetVector<Value *, 4> PotentialCopies; + SmallSetVector<Instruction *, 4> PotentialValueOrigins; + if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, + PotentialValueOrigins, QueryingAA, + UsedAssumedInformation, + /* OnlyExact */ true)) { + // Values have to be dynamically unique or we loose the fact that a + // single llvm::Value might represent two runtime values (e.g., stack + // locations in different recursive calls). + bool DynamicallyUnique = + llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) { + return AA::isDynamicallyUnique(A, QueryingAA, *PC); + }); + if (DynamicallyUnique && + ((VS & AA::Interprocedural) || !CtxI || + llvm::all_of(PotentialCopies, [CtxI](Value *PC) { + return AA::isValidInScope(*PC, CtxI->getFunction()); + }))) { + for (auto *PotentialCopy : PotentialCopies) + Worklist.push_back({PotentialCopy, CtxI}); + continue; + } + } + } + } + // Once a leaf is reached we inform the user through the callback. if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " @@ -400,8 +487,10 @@ static bool genericValueTraversal( } while (!Worklist.empty()); // If we actually used liveness information so we have to record a dependence. - if (AnyDead) - A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL); + for (auto &It : LivenessAAs) + if (It.second.AnyDead) + A.recordDependence(*It.second.LivenessAA, QueryingAA, + DepClassTy::OPTIONAL); // All values have been visited. return true; @@ -411,7 +500,8 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, SmallVectorImpl<Value *> &Objects, const AbstractAttribute &QueryingAA, const Instruction *CtxI, - bool Intraprocedural) { + bool &UsedAssumedInformation, + AA::ValueScope VS) { auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; SmallPtrSet<Value *, 8> SeenObjects; auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, @@ -423,15 +513,16 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, }; if (!genericValueTraversal<decltype(Objects)>( A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - true, 32, StripCB, Intraprocedural)) + UsedAssumedInformation, true, 32, StripCB, VS)) return false; return true; } -const Value *stripAndAccumulateMinimalOffsets( - Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val, - const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, - bool UseAssumed = false) { +static const Value * +stripAndAccumulateOffsets(Attributor &A, const AbstractAttribute &QueryingAA, + const Value *Val, const DataLayout &DL, APInt &Offset, + bool GetMinOffset, bool AllowNonInbounds, + bool UseAssumed = false) { auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool { const IRPosition &Pos = IRPosition::value(V); @@ -442,14 +533,20 @@ const Value *stripAndAccumulateMinimalOffsets( : DepClassTy::NONE); ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed() : ValueConstantRangeAA.getKnown(); + if (Range.isFullSet()) + return false; + // We can only use the lower part of the range because the upper part can // be higher than what the value can really be. - ROffset = Range.getSignedMin(); + if (GetMinOffset) + ROffset = Range.getSignedMin(); + else + ROffset = Range.getSignedMax(); return true; }; return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds, - /* AllowInvariant */ false, + /* AllowInvariant */ true, AttributorAnalysis); } @@ -458,8 +555,9 @@ getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA, const Value *Ptr, int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - const Value *Base = stripAndAccumulateMinimalOffsets( - A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); + const Value *Base = + stripAndAccumulateOffsets(A, QueryingAA, Ptr, DL, OffsetAPInt, + /* GetMinOffset */ true, AllowNonInbounds); BytesOffset = OffsetAPInt.getSExtValue(); return Base; @@ -493,10 +591,9 @@ static void clampReturnedValueStates( LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() << " @ " << RVPos << "\n"); const StateType &AAS = AA.getState(); - if (T.hasValue()) - *T &= AAS; - else - T = AAS; + if (!T) + T = StateType::getBestState(AAS); + *T &= AAS; LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T << "\n"); return T->isValidState(); @@ -504,7 +601,7 @@ static void clampReturnedValueStates( if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) S.indicatePessimisticFixpoint(); - else if (T.hasValue()) + else if (T) S ^= *T; } @@ -560,20 +657,19 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); const StateType &AAS = AA.getState(); - if (T.hasValue()) - *T &= AAS; - else - T = AAS; + if (!T) + T = StateType::getBestState(AAS); + *T &= AAS; LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T << "\n"); return T->isValidState(); }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true, - AllCallSitesKnown)) + UsedAssumedInformation)) S.indicatePessimisticFixpoint(); - else if (T.hasValue()) + else if (T) S ^= *T; } @@ -667,7 +763,6 @@ struct AACallSiteReturnedFromReturned : public BaseType { return clampStateAndIndicateChange(S, AA.getState()); } }; -} // namespace /// Helper function to accumulate uses. template <class AAType, typename StateType = typename AAType::StateType> @@ -779,6 +874,7 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, S += ParentState; } } +} // namespace /// ------------------------ PointerInfo --------------------------------------- @@ -786,9 +882,6 @@ namespace llvm { namespace AA { namespace PointerInfo { -/// An access kind description as used by AAPointerInfo. -struct OffsetAndSize; - struct State; } // namespace PointerInfo @@ -806,7 +899,7 @@ struct DenseMapInfo<AAPointerInfo::Access> : DenseMapInfo<Instruction *> { /// Helper that allows OffsetAndSize as a key in a DenseMap. template <> -struct DenseMapInfo<AA::PointerInfo ::OffsetAndSize> +struct DenseMapInfo<AAPointerInfo ::OffsetAndSize> : DenseMapInfo<std::pair<int64_t, int64_t>> {}; /// Helper for AA::PointerInfo::Acccess DenseMap/Set usage ignoring everythign @@ -822,90 +915,15 @@ struct AccessAsInstructionInfo : DenseMapInfo<Instruction *> { } // namespace llvm -/// Helper to represent an access offset and size, with logic to deal with -/// uncertainty and check for overlapping accesses. -struct AA::PointerInfo::OffsetAndSize : public std::pair<int64_t, int64_t> { - using BaseTy = std::pair<int64_t, int64_t>; - OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {} - OffsetAndSize(const BaseTy &P) : BaseTy(P) {} - int64_t getOffset() const { return first; } - int64_t getSize() const { return second; } - static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); } - - /// Return true if offset or size are unknown. - bool offsetOrSizeAreUnknown() const { - return getOffset() == OffsetAndSize::Unknown || - getSize() == OffsetAndSize::Unknown; - } - - /// Return true if this offset and size pair might describe an address that - /// overlaps with \p OAS. - bool mayOverlap(const OffsetAndSize &OAS) const { - // Any unknown value and we are giving up -> overlap. - if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown()) - return true; - - // Check if one offset point is in the other interval [offset, offset+size]. - return OAS.getOffset() + OAS.getSize() > getOffset() && - OAS.getOffset() < getOffset() + getSize(); - } - - /// Constant used to represent unknown offset or sizes. - static constexpr int64_t Unknown = 1 << 31; -}; - -/// Implementation of the DenseMapInfo. -/// -///{ -inline llvm::AccessAsInstructionInfo::Access -llvm::AccessAsInstructionInfo::getEmptyKey() { - return Access(Base::getEmptyKey(), nullptr, AAPointerInfo::AK_READ, nullptr); -} -inline llvm::AccessAsInstructionInfo::Access -llvm::AccessAsInstructionInfo::getTombstoneKey() { - return Access(Base::getTombstoneKey(), nullptr, AAPointerInfo::AK_READ, - nullptr); -} -unsigned llvm::AccessAsInstructionInfo::getHashValue( - const llvm::AccessAsInstructionInfo::Access &A) { - return Base::getHashValue(A.getRemoteInst()); -} -bool llvm::AccessAsInstructionInfo::isEqual( - const llvm::AccessAsInstructionInfo::Access &LHS, - const llvm::AccessAsInstructionInfo::Access &RHS) { - return LHS.getRemoteInst() == RHS.getRemoteInst(); -} -inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access -llvm::DenseMapInfo<AAPointerInfo::Access>::getEmptyKey() { - return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_READ, - nullptr); -} -inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access -llvm::DenseMapInfo<AAPointerInfo::Access>::getTombstoneKey() { - return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_WRITE, - nullptr); -} - -unsigned llvm::DenseMapInfo<AAPointerInfo::Access>::getHashValue( - const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &A) { - return detail::combineHashValue( - DenseMapInfo<Instruction *>::getHashValue(A.getRemoteInst()), - (A.isWrittenValueYetUndetermined() - ? ~0 - : DenseMapInfo<Value *>::getHashValue(A.getWrittenValue()))) + - A.getKind(); -} - -bool llvm::DenseMapInfo<AAPointerInfo::Access>::isEqual( - const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &LHS, - const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &RHS) { - return LHS == RHS; -} -///} - /// A type to track pointer/struct usage and accesses for AAPointerInfo. struct AA::PointerInfo::State : public AbstractState { + ~State() { + // We do not delete the Accesses objects but need to destroy them still. + for (auto &It : AccessBins) + It.second->~Accesses(); + } + /// Return the best possible representable state. static State getBestState(const State &SIS) { return State(); } @@ -916,9 +934,10 @@ struct AA::PointerInfo::State : public AbstractState { return R; } - State() {} - State(const State &SIS) : AccessBins(SIS.AccessBins) {} - State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {} + State() = default; + State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) { + SIS.AccessBins.clear(); + } const State &getAssumed() const { return *this; } @@ -967,15 +986,11 @@ struct AA::PointerInfo::State : public AbstractState { return false; auto &Accs = It->getSecond(); auto &RAccs = RIt->getSecond(); - if (Accs.size() != RAccs.size()) + if (Accs->size() != RAccs->size()) return false; - auto AccIt = Accs.begin(), RAccIt = RAccs.begin(), AccE = Accs.end(); - while (AccIt != AccE) { - if (*AccIt != *RAccIt) + for (const auto &ZipIt : llvm::zip(*Accs, *RAccs)) + if (std::get<0>(ZipIt) != std::get<1>(ZipIt)) return false; - ++AccIt; - ++RAccIt; - } ++It; ++RIt; } @@ -984,42 +999,88 @@ struct AA::PointerInfo::State : public AbstractState { bool operator!=(const State &R) const { return !(*this == R); } /// We store accesses in a set with the instruction as key. - using Accesses = DenseSet<AAPointerInfo::Access, AccessAsInstructionInfo>; + struct Accesses { + SmallVector<AAPointerInfo::Access, 4> Accesses; + DenseMap<const Instruction *, unsigned> Map; + + unsigned size() const { return Accesses.size(); } + + using vec_iterator = decltype(Accesses)::iterator; + vec_iterator begin() { return Accesses.begin(); } + vec_iterator end() { return Accesses.end(); } + + using iterator = decltype(Map)::const_iterator; + iterator find(AAPointerInfo::Access &Acc) { + return Map.find(Acc.getRemoteInst()); + } + iterator find_end() { return Map.end(); } + + AAPointerInfo::Access &get(iterator &It) { + return Accesses[It->getSecond()]; + } + + void insert(AAPointerInfo::Access &Acc) { + Map[Acc.getRemoteInst()] = Accesses.size(); + Accesses.push_back(Acc); + } + }; /// We store all accesses in bins denoted by their offset and size. - using AccessBinsTy = DenseMap<OffsetAndSize, Accesses>; + using AccessBinsTy = DenseMap<AAPointerInfo::OffsetAndSize, Accesses *>; AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); } AccessBinsTy::const_iterator end() const { return AccessBins.end(); } protected: /// The bins with all the accesses for the associated pointer. - DenseMap<OffsetAndSize, Accesses> AccessBins; + AccessBinsTy AccessBins; /// Add a new access to the state at offset \p Offset and with size \p Size. /// The access is associated with \p I, writes \p Content (if anything), and /// is of kind \p Kind. /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. - ChangeStatus addAccess(int64_t Offset, int64_t Size, Instruction &I, - Optional<Value *> Content, + ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, + Instruction &I, Optional<Value *> Content, AAPointerInfo::AccessKind Kind, Type *Ty, Instruction *RemoteI = nullptr, Accesses *BinPtr = nullptr) { - OffsetAndSize Key{Offset, Size}; - Accesses &Bin = BinPtr ? *BinPtr : AccessBins[Key]; + AAPointerInfo::OffsetAndSize Key{Offset, Size}; + Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key]; + if (!Bin) + Bin = new (A.Allocator) Accesses; AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty); // Check if we have an access for this instruction in this bin, if not, // simply add it. - auto It = Bin.find(Acc); - if (It == Bin.end()) { - Bin.insert(Acc); + auto It = Bin->find(Acc); + if (It == Bin->find_end()) { + Bin->insert(Acc); return ChangeStatus::CHANGED; } // If the existing access is the same as then new one, nothing changed. - AAPointerInfo::Access Before = *It; + AAPointerInfo::Access &Current = Bin->get(It); + AAPointerInfo::Access Before = Current; // The new one will be combined with the existing one. - *It &= Acc; - return *It == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + Current &= Acc; + return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + } + + /// See AAPointerInfo::forallInterferingAccesses. + bool forallInterferingAccesses( + AAPointerInfo::OffsetAndSize OAS, + function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const { + if (!isValidState()) + return false; + + for (auto &It : AccessBins) { + AAPointerInfo::OffsetAndSize ItOAS = It.getFirst(); + if (!OAS.mayOverlap(ItOAS)) + continue; + bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); + for (auto &Access : *It.getSecond()) + if (!CB(Access, IsExact)) + return false; + } + return true; } /// See AAPointerInfo::forallInterferingAccesses. @@ -1028,10 +1089,11 @@ protected: function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const { if (!isValidState()) return false; + // First find the offset and size of I. - OffsetAndSize OAS(-1, -1); + AAPointerInfo::OffsetAndSize OAS(-1, -1); for (auto &It : AccessBins) { - for (auto &Access : It.getSecond()) { + for (auto &Access : *It.getSecond()) { if (Access.getRemoteInst() == &I) { OAS = It.getFirst(); break; @@ -1040,21 +1102,13 @@ protected: if (OAS.getSize() != -1) break; } + // No access for I was found, we are done. if (OAS.getSize() == -1) return true; // Now that we have an offset and size, find all overlapping ones and use // the callback on the accesses. - for (auto &It : AccessBins) { - OffsetAndSize ItOAS = It.getFirst(); - if (!OAS.mayOverlap(ItOAS)) - continue; - bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); - for (auto &Access : It.getSecond()) - if (!CB(Access, IsExact)) - return false; - } - return true; + return forallInterferingAccesses(OAS, CB); } private: @@ -1062,6 +1116,7 @@ private: BooleanState BS; }; +namespace { struct AAPointerInfoImpl : public StateWrapper<AA::PointerInfo::State, AAPointerInfo> { using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>; @@ -1084,22 +1139,18 @@ struct AAPointerInfoImpl } bool forallInterferingAccesses( - LoadInst &LI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB) + OffsetAndSize OAS, + function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const override { - return State::forallInterferingAccesses(LI, CB); + return State::forallInterferingAccesses(OAS, CB); } bool forallInterferingAccesses( - StoreInst &SI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB) - const override { - return State::forallInterferingAccesses(SI, CB); - } - bool forallInterferingWrites( - Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref<bool(const Access &, bool)> UserCB) const override { SmallPtrSet<const Access *, 8> DominatingWrites; - SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites; + SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses; - Function &Scope = *LI.getFunction(); + Function &Scope = *I.getFunction(); const auto &NoSyncAA = A.getAAFor<AANoSync>( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>( @@ -1127,13 +1178,15 @@ struct AAPointerInfoImpl // TODO: Use inter-procedural reachability and dominance. const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - QueryingAA, IRPosition::function(*LI.getFunction()), - DepClassTy::OPTIONAL); + QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); - const bool CanUseCFGResoning = CanIgnoreThreading(LI); + const bool FindInterferingWrites = I.mayReadFromMemory(); + const bool FindInterferingReads = I.mayWriteToMemory(); + const bool UseDominanceReasoning = FindInterferingWrites; + const bool CanUseCFGResoning = CanIgnoreThreading(I); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - NoRecurseAA.isKnownNoRecurse() + NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( Scope) : nullptr; @@ -1189,33 +1242,37 @@ struct AAPointerInfoImpl } auto AccessCB = [&](const Access &Acc, bool Exact) { - if (!Acc.isWrite()) + if ((!FindInterferingWrites || !Acc.isWrite()) && + (!FindInterferingReads || !Acc.isRead())) return true; // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning) { - if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, - IsLiveInCalleeCB)) + if ((!Acc.isWrite() || + !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, + IsLiveInCalleeCB)) && + (!Acc.isRead() || + !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, + IsLiveInCalleeCB))) return true; - if (DT && Exact && - (Acc.getLocalInst()->getFunction() == LI.getFunction()) && + if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) && IsSameThreadAsLoad(Acc)) { - if (DT->dominates(Acc.getLocalInst(), &LI)) + if (DT->dominates(Acc.getLocalInst(), &I)) DominatingWrites.insert(&Acc); } } - InterferingWrites.push_back({&Acc, Exact}); + InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(LI, AccessCB)) + if (!State::forallInterferingAccesses(I, AccessCB)) return false; // If we cannot use CFG reasoning we only filter the non-write accesses // and are done here. if (!CanUseCFGResoning) { - for (auto &It : InterferingWrites) + for (auto &It : InterferingAccesses) if (!UserCB(*It.first, It.second)) return false; return true; @@ -1242,47 +1299,52 @@ struct AAPointerInfoImpl return false; }; - // Run the user callback on all writes we cannot skip and return if that + // Run the user callback on all accesses we cannot skip and return if that // succeeded for all or not. - unsigned NumInterferingWrites = InterferingWrites.size(); - for (auto &It : InterferingWrites) - if (!DT || NumInterferingWrites > MaxInterferingWrites || - !CanSkipAccess(*It.first, It.second)) + unsigned NumInterferingAccesses = InterferingAccesses.size(); + for (auto &It : InterferingAccesses) { + if (!DT || NumInterferingAccesses > MaxInterferingAccesses || + !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; + } + } return true; } - ChangeStatus translateAndAddCalleeState(Attributor &A, - const AAPointerInfo &CalleeAA, - int64_t CallArgOffset, CallBase &CB) { + ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA, + int64_t Offset, CallBase &CB, + bool FromCallee = false) { using namespace AA::PointerInfo; - if (!CalleeAA.getState().isValidState() || !isValidState()) + if (!OtherAA.getState().isValidState() || !isValidState()) return indicatePessimisticFixpoint(); - const auto &CalleeImplAA = static_cast<const AAPointerInfoImpl &>(CalleeAA); - bool IsByval = CalleeImplAA.getAssociatedArgument()->hasByValAttr(); + const auto &OtherAAImpl = static_cast<const AAPointerInfoImpl &>(OtherAA); + bool IsByval = + FromCallee && OtherAAImpl.getAssociatedArgument()->hasByValAttr(); // Combine the accesses bin by bin. ChangeStatus Changed = ChangeStatus::UNCHANGED; - for (auto &It : CalleeImplAA.getState()) { + for (auto &It : OtherAAImpl.getState()) { OffsetAndSize OAS = OffsetAndSize::getUnknown(); - if (CallArgOffset != OffsetAndSize::Unknown) - OAS = OffsetAndSize(It.first.getOffset() + CallArgOffset, - It.first.getSize()); - Accesses &Bin = AccessBins[OAS]; - for (const AAPointerInfo::Access &RAcc : It.second) { + if (Offset != OffsetAndSize::Unknown) + OAS = OffsetAndSize(It.first.getOffset() + Offset, It.first.getSize()); + Accesses *Bin = AccessBins.lookup(OAS); + for (const AAPointerInfo::Access &RAcc : *It.second) { if (IsByval && !RAcc.isRead()) continue; bool UsedAssumedInformation = false; - Optional<Value *> Content = A.translateArgumentToCallSiteContent( - RAcc.getContent(), CB, *this, UsedAssumedInformation); - AccessKind AK = - AccessKind(RAcc.getKind() & (IsByval ? AccessKind::AK_READ - : AccessKind::AK_READ_WRITE)); + AccessKind AK = RAcc.getKind(); + Optional<Value *> Content = RAcc.getContent(); + if (FromCallee) { + Content = A.translateArgumentToCallSiteContent( + RAcc.getContent(), CB, *this, UsedAssumedInformation); + AK = AccessKind( + AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE)); + } Changed = - Changed | addAccess(OAS.getOffset(), OAS.getSize(), CB, Content, AK, - RAcc.getType(), RAcc.getRemoteInst(), &Bin); + Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content, + AK, RAcc.getType(), RAcc.getRemoteInst(), Bin); } } return Changed; @@ -1305,7 +1367,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { bool handleAccess(Attributor &A, Instruction &I, Value &Ptr, Optional<Value *> Content, AccessKind Kind, int64_t Offset, ChangeStatus &Changed, Type *Ty, - int64_t Size = AA::PointerInfo::OffsetAndSize::Unknown) { + int64_t Size = OffsetAndSize::Unknown) { using namespace AA::PointerInfo; // No need to find a size if one is given or the offset is unknown. if (Offset != OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown && @@ -1315,13 +1377,13 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { if (!AccessSize.isScalable()) Size = AccessSize.getFixedSize(); } - Changed = Changed | addAccess(Offset, Size, I, Content, Kind, Ty); + Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, Ty); return true; }; /// Helper struct, will support ranges eventually. struct OffsetInfo { - int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown; + int64_t Offset = OffsetAndSize::Unknown; bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } }; @@ -1329,7 +1391,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { using namespace AA::PointerInfo; - State S = getState(); ChangeStatus Changed = ChangeStatus::UNCHANGED; Value &AssociatedValue = getAssociatedValue(); @@ -1337,7 +1398,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { DenseMap<Value *, OffsetInfo> OffsetInfoMap; OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; - auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI, + auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI, bool &Follow) { OffsetInfo &UsrOI = OffsetInfoMap[Usr]; UsrOI = PtrOI; @@ -1475,8 +1536,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { const auto &CSArgPI = A.getAAFor<AAPointerInfo>( *this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::REQUIRED); - Changed = translateAndAddCalleeState( - A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) | + Changed = translateAndAddState(A, CSArgPI, + OffsetInfoMap[CurPtr].Offset, *CB) | Changed; return true; } @@ -1497,7 +1558,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { }; if (!A.checkForAllUses(UsePred, *this, AssociatedValue, /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, - EquivalentUseCB)) + /* IgnoreDroppableUses */ true, EquivalentUseCB)) return indicatePessimisticFixpoint(); LLVM_DEBUG({ @@ -1505,15 +1566,19 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { for (auto &It : AccessBins) { dbgs() << "[" << It.first.getOffset() << "-" << It.first.getOffset() + It.first.getSize() - << "] : " << It.getSecond().size() << "\n"; - for (auto &Acc : It.getSecond()) { + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; if (Acc.getLocalInst() != Acc.getRemoteInst()) dbgs() << " --> " << *Acc.getRemoteInst() << "\n"; - if (!Acc.isWrittenValueYetUndetermined()) - dbgs() << " - " << Acc.getWrittenValue() << "\n"; + if (!Acc.isWrittenValueYetUndetermined()) { + if (Acc.getWrittenValue()) + dbgs() << " - c: " << *Acc.getWrittenValue() << "\n"; + else + dbgs() << " - c: <unknown>\n"; + } } } }); @@ -1576,7 +1641,7 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { LengthVal = Length->getSExtValue(); Value &Ptr = getAssociatedValue(); unsigned ArgNo = getIRPosition().getCallSiteArgNo(); - ChangeStatus Changed; + ChangeStatus Changed = ChangeStatus::UNCHANGED; if (ArgNo == 0) { handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed, nullptr, LengthVal); @@ -1601,7 +1666,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { const IRPosition &ArgPos = IRPosition::argument(*Arg); auto &ArgAA = A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED); - return translateAndAddCalleeState(A, ArgAA, 0, *cast<CallBase>(getCtxI())); + return translateAndAddState(A, ArgAA, 0, *cast<CallBase>(getCtxI()), + /* FromCallee */ true); } /// See AbstractAttribute::trackStatistics() @@ -1619,9 +1685,11 @@ struct AAPointerInfoCallSiteReturned final : AAPointerInfoFloating { AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition()); } }; +} // namespace /// -----------------------NoUnwind Function Attribute-------------------------- +namespace { struct AANoUnwindImpl : AANoUnwind { AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {} @@ -1693,9 +1761,11 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } }; +} // namespace /// --------------------- Function Return Values ------------------------------- +namespace { /// "Attribute" that collects all potential returned values and the return /// instructions that they arise from. /// @@ -1821,7 +1891,7 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Check if we have an assumed unique return value that we could manifest. Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A); - if (!UniqueRV.hasValue() || !UniqueRV.getValue()) + if (!UniqueRV || !UniqueRV.getValue()) return Changed; // Bookkeeping. @@ -1893,17 +1963,18 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { return true; }; + bool UsedAssumedInformation = false; auto ReturnInstCB = [&](Instruction &I) { ReturnInst &Ret = cast<ReturnInst>(I); return genericValueTraversal<ReturnInst>( A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I, /* UseValueSimplify */ true, /* MaxValues */ 16, - /* StripCB */ nullptr, /* Intraprocedural */ true); + &I, UsedAssumedInformation, /* UseValueSimplify */ true, + /* MaxValues */ 16, + /* StripCB */ nullptr, AA::Intraprocedural); }; // Discover returned values from all live returned instructions in the // associated function. - bool UsedAssumedInformation = false; if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret}, UsedAssumedInformation)) return indicatePessimisticFixpoint(); @@ -1941,20 +2012,10 @@ struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override {} }; +} // namespace /// ------------------------ NoSync Function Attribute ------------------------- -struct AANoSyncImpl : AANoSync { - AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} - - const std::string getAsStr() const override { - return getAssumed() ? "nosync" : "may-sync"; - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; -}; - bool AANoSync::isNonRelaxedAtomic(const Instruction *I) { if (!I->isAtomic()) return false; @@ -1997,6 +2058,18 @@ bool AANoSync::isNoSyncIntrinsic(const Instruction *I) { return false; } +namespace { +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nosync" : "may-sync"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; +}; + ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { auto CheckRWInstForNoSync = [&](Instruction &I) { @@ -2059,9 +2132,11 @@ struct AANoSyncCallSite final : AANoSyncImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } }; +} // namespace /// ------------------------ No-Free Attributes ---------------------------- +namespace { struct AANoFreeImpl : public AANoFree { AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {} @@ -2243,8 +2318,10 @@ struct AANoFreeCallSiteReturned final : AANoFreeFloating { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) } }; +} // namespace /// ------------------------ NonNull Argument Attribute ------------------------ +namespace { static int64_t getKnownNonNullAndDerefBytesForUse( Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { @@ -2332,7 +2409,7 @@ struct AANonNullImpl : AANonNull { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - Value &V = getAssociatedValue(); + Value &V = *getAssociatedValue().stripPointerCasts(); if (!NullIsDefined && hasAttr({Attribute::NonNull, Attribute::Dereferenceable}, /* IgnoreSubsumingPositions */ false, &A)) { @@ -2356,7 +2433,7 @@ struct AANonNullImpl : AANonNull { } } - if (isa<GlobalValue>(&getAssociatedValue())) { + if (isa<GlobalValue>(V)) { indicatePessimisticFixpoint(); return; } @@ -2419,8 +2496,10 @@ struct AANonNullFloating : public AANonNullImpl { }; StateType T; + bool UsedAssumedInformation = false; if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); @@ -2472,9 +2551,11 @@ struct AANonNullCallSiteReturned final /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } }; +} // namespace /// ------------------------ No-Recurse Attributes ---------------------------- +namespace { struct AANoRecurseImpl : public AANoRecurse { AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {} @@ -2498,14 +2579,15 @@ struct AANoRecurseFunction final : AANoRecurseImpl { DepClassTy::NONE); return NoRecurseAA.isKnownNoRecurse(); }; - bool AllCallSitesKnown; - if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) { + bool UsedAssumedInformation = false; + if (A.checkForAllCallSites(CallSitePred, *this, true, + UsedAssumedInformation)) { // If we know all call sites and all are known no-recurse, we are done. // If all known call sites, which might not be all that exist, are known // to be no-recurse, we are not done but we can continue to assume // no-recurse. If one of the call sites we have not visited will become // live, another update is triggered. - if (AllCallSitesKnown) + if (!UsedAssumedInformation) indicateOptimisticFixpoint(); return ChangeStatus::UNCHANGED; } @@ -2549,9 +2631,11 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } }; +} // namespace /// -------------------- Undefined-Behavior Attributes ------------------------ +namespace { struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A) : AAUndefinedBehavior(IRP, A) {} @@ -2582,7 +2666,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // Either we stopped and the appropriate action was taken, // or we got back a simplified value to continue. Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I); - if (!SimplifiedPtrOp.hasValue() || !SimplifiedPtrOp.getValue()) + if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue()) return true; const Value *PtrOpVal = SimplifiedPtrOp.getValue(); @@ -2627,7 +2711,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { // or we got back a simplified value to continue. Optional<Value *> SimplifiedCond = stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst); - if (!SimplifiedCond.hasValue() || !SimplifiedCond.getValue()) + if (!SimplifiedCond || !*SimplifiedCond) return true; AssumedNoUBInsts.insert(&I); return true; @@ -2673,10 +2757,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { IRPosition::value(*ArgVal), *this, UsedAssumedInformation); if (UsedAssumedInformation) continue; - if (SimplifiedVal.hasValue() && !SimplifiedVal.getValue()) + if (SimplifiedVal && !SimplifiedVal.getValue()) return true; - if (!SimplifiedVal.hasValue() || - isa<UndefValue>(*SimplifiedVal.getValue())) { + if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.getValue())) { KnownUBInsts.insert(&I); continue; } @@ -2691,40 +2774,38 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { return true; }; - auto InspectReturnInstForUB = - [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) { - // Check if a return instruction always cause UB or not - // Note: It is guaranteed that the returned position of the anchor - // scope has noundef attribute when this is called. - // We also ensure the return position is not "assumed dead" - // because the returned value was then potentially simplified to - // `undef` in AAReturnedValues without removing the `noundef` - // attribute yet. + auto InspectReturnInstForUB = [&](Instruction &I) { + auto &RI = cast<ReturnInst>(I); + // Either we stopped and the appropriate action was taken, + // or we got back a simplified return value to continue. + Optional<Value *> SimplifiedRetValue = + stopOnUndefOrAssumed(A, RI.getReturnValue(), &I); + if (!SimplifiedRetValue || !*SimplifiedRetValue) + return true; - // When the returned position has noundef attriubte, UB occur in the - // following cases. - // (1) Returned value is known to be undef. - // (2) The value is known to be a null pointer and the returned - // position has nonnull attribute (because the returned value is - // poison). - bool FoundUB = false; - if (isa<UndefValue>(V)) { - FoundUB = true; - } else { - if (isa<ConstantPointerNull>(V)) { - auto &NonNullAA = A.getAAFor<AANonNull>( - *this, IRPosition::returned(*getAnchorScope()), - DepClassTy::NONE); - if (NonNullAA.isKnownNonNull()) - FoundUB = true; - } - } + // Check if a return instruction always cause UB or not + // Note: It is guaranteed that the returned position of the anchor + // scope has noundef attribute when this is called. + // We also ensure the return position is not "assumed dead" + // because the returned value was then potentially simplified to + // `undef` in AAReturnedValues without removing the `noundef` + // attribute yet. - if (FoundUB) - for (ReturnInst *RI : RetInsts) - KnownUBInsts.insert(RI); - return true; - }; + // When the returned position has noundef attriubte, UB occurs in the + // following cases. + // (1) Returned value is known to be undef. + // (2) The value is known to be a null pointer and the returned + // position has nonnull attribute (because the returned value is + // poison). + if (isa<ConstantPointerNull>(*SimplifiedRetValue)) { + auto &NonNullAA = A.getAAFor<AANonNull>( + *this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE); + if (NonNullAA.isKnownNonNull()) + KnownUBInsts.insert(&I); + } + + return true; + }; bool UsedAssumedInformation = false; A.checkForAllInstructions(InspectMemAccessInstForUB, *this, @@ -2747,8 +2828,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { auto &RetPosNoUndefAA = A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE); if (RetPosNoUndefAA.isKnownNoUndef()) - A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB, - *this); + A.checkForAllInstructions(InspectReturnInstForUB, *this, + {Instruction::Ret}, UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); } } @@ -2776,7 +2858,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { case Instruction::AtomicRMW: return !AssumedNoUBInsts.count(I); case Instruction::Br: { - auto BrInst = cast<BranchInst>(I); + auto *BrInst = cast<BranchInst>(I); if (BrInst->isUnconditional()) return false; return !AssumedNoUBInsts.count(I); @@ -2847,13 +2929,13 @@ private: IRPosition::value(*V), *this, UsedAssumedInformation); if (!UsedAssumedInformation) { // Don't depend on assumed values. - if (!SimplifiedV.hasValue()) { + if (!SimplifiedV) { // If it is known (which we tested above) but it doesn't have a value, // then we can assume `undef` and hence the instruction is UB. KnownUBInsts.insert(I); return llvm::None; } - if (!SimplifiedV.getValue()) + if (!*SimplifiedV) return nullptr; V = *SimplifiedV; } @@ -2877,9 +2959,11 @@ struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl { KnownUBInsts.size(); } }; +} // namespace /// ------------------------ Will-Return Attributes ---------------------------- +namespace { // Helper function that checks whether a function has any cycle which we don't // know if it is bounded or not. // Loops with maximum trip count are considered bounded, any other cycle not. @@ -3018,9 +3102,11 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } }; +} // namespace /// -------------------AAReachability Attribute-------------------------- +namespace { struct AAReachabilityImpl : AAReachability { AAReachabilityImpl(const IRPosition &IRP, Attributor &A) : AAReachability(IRP, A) {} @@ -3032,10 +3118,6 @@ struct AAReachabilityImpl : AAReachability { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); - if (!NoRecurseAA.isAssumedNoRecurse()) - return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } }; @@ -3047,9 +3129,11 @@ struct AAReachabilityFunction final : public AAReachabilityImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); } }; +} // namespace /// ------------------------ NoAlias Argument Attribute ------------------------ +namespace { struct AANoAliasImpl : AANoAlias { AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) { assert(getAssociatedType()->isPointerTy() && @@ -3146,10 +3230,10 @@ struct AANoAliasArgument final // If the argument is never passed through callbacks, no-alias cannot break // synchronization. - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (A.checkForAllCallSites( [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this, - true, AllCallSitesKnown)) + true, UsedAssumedInformation)) return Base::updateImpl(A); // TODO: add no-alias but make sure it doesn't break synchronization by @@ -3246,14 +3330,20 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return false; } + auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) { + const auto &DerefAA = A.getAAFor<AADereferenceable>( + *this, IRPosition::value(*O), DepClassTy::OPTIONAL); + return DerefAA.getAssumedDereferenceableBytes(); + }; + A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL); const IRPosition &VIRP = IRPosition::value(getAssociatedValue()); const Function *ScopeFn = VIRP.getAnchorScope(); auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, VIRP, DepClassTy::NONE); // Check whether the value is captured in the scope using AANoCapture. - // Look at CFG and check only uses possibly executed before this - // callsite. + // Look at CFG and check only uses possibly executed before this + // callsite. auto UsePred = [&](const Use &U, bool &Follow) -> bool { Instruction *UserI = cast<Instruction>(U.getUser()); @@ -3265,12 +3355,6 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return true; if (ScopeFn) { - const auto &ReachabilityAA = A.getAAFor<AAReachability>( - *this, IRPosition::function(*ScopeFn), DepClassTy::OPTIONAL); - - if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI())) - return true; - if (auto *CB = dyn_cast<CallBase>(UserI)) { if (CB->isArgOperand(&U)) { @@ -3284,17 +3368,26 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { return true; } } + + if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this)) + return true; } - // For cases which can potentially have more users - if (isa<GetElementPtrInst>(U) || isa<BitCastInst>(U) || isa<PHINode>(U) || - isa<SelectInst>(U)) { + // TODO: We should track the capturing uses in AANoCapture but the problem + // is CGSCC runs. For those we would need to "allow" AANoCapture for + // a value in the module slice. + switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { + case UseCaptureKind::NO_CAPTURE: + return true; + case UseCaptureKind::MAY_CAPTURE: + LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *UserI + << "\n"); + return false; + case UseCaptureKind::PASSTHROUGH: Follow = true; return true; } - - LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n"); - return false; + llvm_unreachable("unknown UseCaptureKind"); }; if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { @@ -3423,12 +3516,21 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } }; +} // namespace /// -------------------AAIsDead Function Attribute----------------------- +namespace { struct AAIsDeadValueImpl : public AAIsDead { AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (auto *Scope = getAnchorScope()) + if (!A.isRunOn(*Scope)) + indicatePessimisticFixpoint(); + } + /// See AAIsDead::isAssumedDead(). bool isAssumedDead() const override { return isAssumed(IS_DEAD); } @@ -3452,22 +3554,25 @@ struct AAIsDeadValueImpl : public AAIsDead { } /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { + virtual const std::string getAsStr() const override { return isAssumedDead() ? "assumed-dead" : "assumed-live"; } /// Check if all uses are assumed dead. bool areAllUsesAssumedDead(Attributor &A, Value &V) { // Callers might not check the type, void has no uses. - if (V.getType()->isVoidTy()) + if (V.getType()->isVoidTy() || V.use_empty()) return true; // If we replace a value with a constant there are no uses left afterwards. if (!isa<Constant>(V)) { + if (auto *I = dyn_cast<Instruction>(&V)) + if (!A.isRunOn(*I->getFunction())) + return false; bool UsedAssumedInformation = false; Optional<Constant *> C = A.getAssumedConstant(V, *this, UsedAssumedInformation); - if (!C.hasValue() || *C) + if (!C || *C) return true; } @@ -3477,7 +3582,8 @@ struct AAIsDeadValueImpl : public AAIsDead { // without going through N update cycles. This is not required for // correctness. return A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ false, - DepClassTy::REQUIRED); + DepClassTy::REQUIRED, + /* IgnoreDroppableUses */ false); } /// Determine if \p I is assumed to be side-effect free. @@ -3508,6 +3614,8 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadValueImpl::initialize(A); + if (isa<UndefValue>(getAssociatedValue())) { indicatePessimisticFixpoint(); return; @@ -3538,6 +3646,15 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { }); } + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()); + if (isa_and_nonnull<StoreInst>(I)) + if (isValidState()) + return "assumed-dead-store"; + return AAIsDeadValueImpl::getAsStr(); + } + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()); @@ -3553,6 +3670,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { return ChangeStatus::UNCHANGED; } + bool isRemovableStore() const override { + return isAssumed(IS_REMOVABLE) && isa<StoreInst>(&getAssociatedValue()); + } + /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { Value &V = getAssociatedValue(); @@ -3567,21 +3688,7 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { return ChangeStatus::CHANGED; } } - if (V.use_empty()) - return ChangeStatus::UNCHANGED; - - bool UsedAssumedInformation = false; - Optional<Constant *> C = - A.getAssumedConstant(V, *this, UsedAssumedInformation); - if (C.hasValue() && C.getValue()) - return ChangeStatus::UNCHANGED; - - // Replace the value with undef as it is dead but keep droppable uses around - // as they provide information we don't want to give up on just yet. - UndefValue &UV = *UndefValue::get(V.getType()); - bool AnyChange = - A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false); - return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; + return ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() @@ -3596,23 +3703,22 @@ struct AAIsDeadArgument : public AAIsDeadFloating { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadFloating::initialize(A); if (!A.isFunctionIPOAmendable(*getAnchorScope())) indicatePessimisticFixpoint(); } /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = AAIsDeadFloating::manifest(A); Argument &Arg = *getAssociatedArgument(); if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {})) if (A.registerFunctionSignatureRewrite( Arg, /* ReplacementTypes */ {}, Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{}, Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) { - Arg.dropDroppableUses(); return ChangeStatus::CHANGED; } - return Changed; + return ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() @@ -3625,6 +3731,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadValueImpl::initialize(A); if (isa<UndefValue>(getAssociatedValue())) indicatePessimisticFixpoint(); } @@ -3661,7 +3768,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {} + : AAIsDeadFloating(IRP, A) {} /// See AAIsDead::isAssumedDead(). bool isAssumedDead() const override { @@ -3670,6 +3777,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + AAIsDeadFloating::initialize(A); if (isa<UndefValue>(getAssociatedValue())) { indicatePessimisticFixpoint(); return; @@ -3707,7 +3815,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { } private: - bool IsAssumedSideEffectFree; + bool IsAssumedSideEffectFree = true; }; struct AAIsDeadReturned : public AAIsDeadValueImpl { @@ -3727,9 +3835,8 @@ struct AAIsDeadReturned : public AAIsDeadValueImpl { return areAllUsesAssumedDead(A, *ACS.getInstruction()); }; - bool AllCallSitesKnown; if (!A.checkForAllCallSites(PredForCallSite, *this, true, - AllCallSitesKnown)) + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; @@ -3761,17 +3868,13 @@ struct AAIsDeadFunction : public AAIsDead { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - const Function *F = getAnchorScope(); - if (F && !F->isDeclaration()) { - // We only want to compute liveness once. If the function is not part of - // the SCC, skip it. - if (A.isRunOn(*const_cast<Function *>(F))) { - ToBeExploredFrom.insert(&F->getEntryBlock().front()); - assumeLive(A, F->getEntryBlock()); - } else { - indicatePessimisticFixpoint(); - } + Function *F = getAnchorScope(); + if (!F || F->isDeclaration() || !A.isRunOn(*F)) { + indicatePessimisticFixpoint(); + return; } + ToBeExploredFrom.insert(&F->getEntryBlock().front()); + assumeLive(A, F->getEntryBlock()); } /// See AbstractAttribute::getAsStr(). @@ -3834,6 +3937,9 @@ struct AAIsDeadFunction : public AAIsDead { ChangeStatus updateImpl(Attributor &A) override; bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override { + assert(From->getParent() == getAnchorScope() && + To->getParent() == getAnchorScope() && + "Used AAIsDead of the wrong function"); return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To)); } @@ -3973,7 +4079,7 @@ identifyAliveSuccessors(Attributor &A, const BranchInst &BI, } else { Optional<Constant *> C = A.getAssumedConstant(*BI.getCondition(), AA, UsedAssumedInformation); - if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) { + if (!C || isa_and_nonnull<UndefValue>(*C)) { // No value yet, assume both edges are dead. } else if (isa_and_nonnull<ConstantInt>(*C)) { const BasicBlock *SuccBB = @@ -3995,7 +4101,7 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI, bool UsedAssumedInformation = false; Optional<Constant *> C = A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation); - if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) { + if (!C || isa_and_nonnull<UndefValue>(C.getValue())) { // No value yet, assume all edges are dead. } else if (isa_and_nonnull<ConstantInt>(C.getValue())) { for (auto &CaseIt : SI.cases()) { @@ -4142,9 +4248,11 @@ struct AAIsDeadCallSite final : AAIsDeadFunction { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override {} }; +} // namespace /// -------------------- Dereferenceable Argument Attribute -------------------- +namespace { struct AADereferenceableImpl : AADereferenceable { AADereferenceableImpl(const IRPosition &IRP, Attributor &A) : AADereferenceable(IRP, A) {} @@ -4152,6 +4260,7 @@ struct AADereferenceableImpl : AADereferenceable { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { + Value &V = *getAssociatedValue().stripPointerCasts(); SmallVector<Attribute, 4> Attrs; getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, Attrs, /* IgnoreSubsumingPositions */ false, &A); @@ -4162,9 +4271,8 @@ struct AADereferenceableImpl : AADereferenceable { NonNullAA = &A.getAAFor<AANonNull>(*this, IRP, DepClassTy::NONE); bool CanBeNull, CanBeFreed; - takeKnownDerefBytesMaximum( - IRP.getAssociatedValue().getPointerDereferenceableBytes( - A.getDataLayout(), CanBeNull, CanBeFreed)); + takeKnownDerefBytesMaximum(V.getPointerDereferenceableBytes( + A.getDataLayout(), CanBeNull, CanBeFreed)); bool IsFnInterface = IRP.isFnInterfaceKind(); Function *FnScope = IRP.getAnchorScope(); @@ -4263,8 +4371,9 @@ struct AADereferenceableFloating : AADereferenceableImpl { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); - const Value *Base = - stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); + const Value *Base = stripAndAccumulateOffsets( + A, *this, &V, DL, Offset, /* GetMinOffset */ false, + /* AllowNonInbounds */ true); const auto &AA = A.getAAFor<AADereferenceable>( *this, IRPosition::value(*Base), DepClassTy::REQUIRED); @@ -4312,8 +4421,10 @@ struct AADereferenceableFloating : AADereferenceableImpl { }; DerefState T; + bool UsedAssumedInformation = false; if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); @@ -4377,9 +4488,11 @@ struct AADereferenceableCallSiteReturned final STATS_DECLTRACK_CS_ATTR(dereferenceable); } }; +} // namespace // ------------------------ Align Argument Attribute ------------------------ +namespace { static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &TrackUse) { @@ -4450,14 +4563,8 @@ struct AAAlignImpl : AAAlign { for (const Attribute &Attr : Attrs) takeKnownMaximum(Attr.getValueAsInt()); - Value &V = getAssociatedValue(); - // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int - // use of the function pointer. This was caused by D73131. We want to - // avoid this for function pointers especially because we iterate - // their uses and int2ptr is not handled. It is not a correctness - // problem though! - if (!V.getType()->getPointerElementType()->isFunctionTy()) - takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); + Value &V = *getAssociatedValue().stripPointerCasts(); + takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); if (getIRPosition().isFnInterfaceKind() && (!getAnchorScope() || @@ -4479,16 +4586,16 @@ struct AAAlignImpl : AAAlign { for (const Use &U : AssociatedValue.uses()) { if (auto *SI = dyn_cast<StoreInst>(U.getUser())) { if (SI->getPointerOperand() == &AssociatedValue) - if (SI->getAlignment() < getAssumedAlign()) { + if (SI->getAlign() < getAssumedAlign()) { STATS_DECLTRACK(AAAlign, Store, "Number of times alignment added to a store"); - SI->setAlignment(Align(getAssumedAlign())); + SI->setAlignment(getAssumedAlign()); LoadStoreChanged = ChangeStatus::CHANGED; } } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) { if (LI->getPointerOperand() == &AssociatedValue) - if (LI->getAlignment() < getAssumedAlign()) { - LI->setAlignment(Align(getAssumedAlign())); + if (LI->getAlign() < getAssumedAlign()) { + LI->setAlignment(getAssumedAlign()); STATS_DECLTRACK(AAAlign, Load, "Number of times alignment added to a load"); LoadStoreChanged = ChangeStatus::CHANGED; @@ -4532,9 +4639,8 @@ struct AAAlignImpl : AAAlign { /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { - return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + - "-" + std::to_string(getAssumedAlign()) + ">") - : "unknown-align"; + return "align<" + std::to_string(getKnownAlign().value()) + "-" + + std::to_string(getAssumedAlign().value()) + ">"; } }; @@ -4548,6 +4654,8 @@ struct AAAlignFloating : AAAlignImpl { auto VisitValueCB = [&](Value &V, const Instruction *, AAAlign::StateType &T, bool Stripped) -> bool { + if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V)) + return true; const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V), DepClassTy::REQUIRED); if (!Stripped && this == &AA) { @@ -4555,6 +4663,7 @@ struct AAAlignFloating : AAAlignImpl { unsigned Alignment = 1; if (const Value *Base = GetPointerBaseWithConstantOffset(&V, Offset, DL)) { + // TODO: Use AAAlign for the base too. Align PA = Base->getPointerAlignment(DL); // BasePointerAddr + Offset = Alignment * Q for some integer Q. // So we can say that the maximum power of two which is a divisor of @@ -4578,8 +4687,10 @@ struct AAAlignFloating : AAAlignImpl { }; StateType T; + bool UsedAssumedInformation = false; if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); // TODO: If we know we visited all incoming values, thus no are assumed @@ -4657,7 +4768,7 @@ struct AAAlignCallSiteArgument final : AAAlignFloating { // so we do not need to track a dependence. const auto &ArgAlignAA = A.getAAFor<AAAlign>( *this, IRPosition::argument(*Arg), DepClassTy::NONE); - takeKnownMaximum(ArgAlignAA.getKnownAlign()); + takeKnownMaximum(ArgAlignAA.getKnownAlign().value()); } return Changed; } @@ -4684,8 +4795,10 @@ struct AAAlignCallSiteReturned final /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } }; +} // namespace /// ------------------ Function No-Return Attribute ---------------------------- +namespace { struct AANoReturnImpl : public AANoReturn { AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {} @@ -4753,9 +4866,179 @@ struct AANoReturnCallSite final : AANoReturnImpl { /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } }; +} // namespace + +/// ----------------------- Instance Info --------------------------------- + +namespace { +/// A class to hold the state of for no-capture attributes. +struct AAInstanceInfoImpl : public AAInstanceInfo { + AAInstanceInfoImpl(const IRPosition &IRP, Attributor &A) + : AAInstanceInfo(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAssociatedValue(); + if (auto *C = dyn_cast<Constant>(&V)) { + if (C->isThreadDependent()) + indicatePessimisticFixpoint(); + else + indicateOptimisticFixpoint(); + return; + } + if (auto *CB = dyn_cast<CallBase>(&V)) + if (CB->arg_size() == 0 && !CB->mayHaveSideEffects() && + !CB->mayReadFromMemory()) { + indicateOptimisticFixpoint(); + return; + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + Value &V = getAssociatedValue(); + const Function *Scope = nullptr; + if (auto *I = dyn_cast<Instruction>(&V)) + Scope = I->getFunction(); + if (auto *A = dyn_cast<Argument>(&V)) { + Scope = A->getParent(); + if (!Scope->hasLocalLinkage()) + return Changed; + } + if (!Scope) + return indicateOptimisticFixpoint(); + + auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + *this, IRPosition::function(*Scope), DepClassTy::OPTIONAL); + if (NoRecurseAA.isAssumedNoRecurse()) + return Changed; + + auto UsePred = [&](const Use &U, bool &Follow) { + const Instruction *UserI = dyn_cast<Instruction>(U.getUser()); + if (!UserI || isa<GetElementPtrInst>(UserI) || isa<CastInst>(UserI) || + isa<PHINode>(UserI) || isa<SelectInst>(UserI)) { + Follow = true; + return true; + } + if (isa<LoadInst>(UserI) || isa<CmpInst>(UserI) || + (isa<StoreInst>(UserI) && + cast<StoreInst>(UserI)->getValueOperand() != U.get())) + return true; + if (auto *CB = dyn_cast<CallBase>(UserI)) { + // This check is not guaranteeing uniqueness but for now that we cannot + // end up with two versions of \p U thinking it was one. + if (!CB->getCalledFunction() || + !CB->getCalledFunction()->hasLocalLinkage()) + return true; + if (!CB->isArgOperand(&U)) + return false; + const auto &ArgInstanceInfoAA = A.getAAFor<AAInstanceInfo>( + *this, IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)), + DepClassTy::OPTIONAL); + if (!ArgInstanceInfoAA.isAssumedUniqueForAnalysis()) + return false; + // If this call base might reach the scope again we might forward the + // argument back here. This is very conservative. + if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr)) + return false; + return true; + } + return false; + }; + + auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { + if (auto *SI = dyn_cast<StoreInst>(OldU.getUser())) { + auto *Ptr = SI->getPointerOperand()->stripPointerCasts(); + if (isa<AllocaInst>(Ptr) && AA::isDynamicallyUnique(A, *this, *Ptr)) + return true; + auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction( + *SI->getFunction()); + if (isAllocationFn(Ptr, TLI) && AA::isDynamicallyUnique(A, *this, *Ptr)) + return true; + } + return false; + }; + + if (!A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ true, + DepClassTy::OPTIONAL, + /* IgnoreDroppableUses */ true, EquivalentUseCB)) + return indicatePessimisticFixpoint(); + + return Changed; + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + return isAssumedUniqueForAnalysis() ? "<unique [fAa]>" : "<unknown>"; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// InstanceInfo attribute for floating values. +struct AAInstanceInfoFloating : AAInstanceInfoImpl { + AAInstanceInfoFloating(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoImpl(IRP, A) {} +}; + +/// NoCapture attribute for function arguments. +struct AAInstanceInfoArgument final : AAInstanceInfoFloating { + AAInstanceInfoArgument(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoFloating(IRP, A) {} +}; + +/// InstanceInfo attribute for call site arguments. +struct AAInstanceInfoCallSiteArgument final : AAInstanceInfoImpl { + AAInstanceInfoCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = + A.getAAFor<AAInstanceInfo>(*this, ArgPos, DepClassTy::REQUIRED); + return clampStateAndIndicateChange(getState(), ArgAA.getState()); + } +}; + +/// InstanceInfo attribute for function return value. +struct AAInstanceInfoReturned final : AAInstanceInfoImpl { + AAInstanceInfoReturned(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoImpl(IRP, A) { + llvm_unreachable("InstanceInfo is not applicable to function returns!"); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("InstanceInfo is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("InstanceInfo is not applicable to function returns!"); + } +}; + +/// InstanceInfo attribute deduction for a call site return value. +struct AAInstanceInfoCallSiteReturned final : AAInstanceInfoFloating { + AAInstanceInfoCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAInstanceInfoFloating(IRP, A) {} +}; +} // namespace /// ----------------------- Variable Capturing --------------------------------- +namespace { /// A class to hold the state of for no-capture attributes. struct AANoCaptureImpl : public AANoCapture { AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {} @@ -4863,143 +5146,69 @@ struct AANoCaptureImpl : public AANoCapture { return "assumed not-captured-maybe-returned"; return "assumed-captured"; } -}; - -/// Attributor-aware capture tracker. -struct AACaptureUseTracker final : public CaptureTracker { - - /// Create a capture tracker that can lookup in-flight abstract attributes - /// through the Attributor \p A. - /// - /// If a use leads to a potential capture, \p CapturedInMemory is set and the - /// search is stopped. If a use leads to a return instruction, - /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. - /// If a use leads to a ptr2int which may capture the value, - /// \p CapturedInInteger is set. If a use is found that is currently assumed - /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies - /// set. All values in \p PotentialCopies are later tracked as well. For every - /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, - /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger - /// conservatively set to true. - AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, - const AAIsDead &IsDeadAA, AANoCapture::StateType &State, - SmallSetVector<Value *, 4> &PotentialCopies, - unsigned &RemainingUsesToExplore) - : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), - PotentialCopies(PotentialCopies), - RemainingUsesToExplore(RemainingUsesToExplore) {} - - /// Determine if \p V maybe captured. *Also updates the state!* - bool valueMayBeCaptured(const Value *V) { - if (V->getType()->isPointerTy()) { - PointerMayBeCaptured(V, this); - } else { - State.indicatePessimisticFixpoint(); - } - return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); - } - - /// See CaptureTracker::tooManyUses(). - void tooManyUses() override { - State.removeAssumedBits(AANoCapture::NO_CAPTURE); - } - bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { - if (CaptureTracker::isDereferenceableOrNull(O, DL)) - return true; - const auto &DerefAA = A.getAAFor<AADereferenceable>( - NoCaptureAA, IRPosition::value(*O), DepClassTy::OPTIONAL); - return DerefAA.getAssumedDereferenceableBytes(); - } - - /// See CaptureTracker::captured(...). - bool captured(const Use *U) override { - Instruction *UInst = cast<Instruction>(U->getUser()); - LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst - << "\n"); - - // Because we may reuse the tracker multiple times we keep track of the - // number of explored uses ourselves as well. - if (RemainingUsesToExplore-- == 0) { - LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); - return isCapturedIn(/* Memory */ true, /* Integer */ true, - /* Return */ true); - } + /// Check the use \p U and update \p State accordingly. Return true if we + /// should continue to update the state. + bool checkUse(Attributor &A, AANoCapture::StateType &State, const Use &U, + bool &Follow) { + Instruction *UInst = cast<Instruction>(U.getUser()); + LLVM_DEBUG(dbgs() << "[AANoCapture] Check use: " << *U.get() << " in " + << *UInst << "\n"); // Deal with ptr2int by following uses. if (isa<PtrToIntInst>(UInst)) { LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); - return valueMayBeCaptured(UInst); + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, + /* Return */ true); } - // For stores we check if we can follow the value through memory or not. - if (auto *SI = dyn_cast<StoreInst>(UInst)) { - if (SI->isVolatile()) - return isCapturedIn(/* Memory */ true, /* Integer */ false, - /* Return */ false); - bool UsedAssumedInformation = false; - if (!AA::getPotentialCopiesOfStoredValue( - A, *SI, PotentialCopies, NoCaptureAA, UsedAssumedInformation)) - return isCapturedIn(/* Memory */ true, /* Integer */ false, - /* Return */ false); - // Not captured directly, potential copies will be checked. - return isCapturedIn(/* Memory */ false, /* Integer */ false, + // For stores we already checked if we can follow them, if they make it + // here we give up. + if (isa<StoreInst>(UInst)) + return isCapturedIn(State, /* Memory */ true, /* Integer */ false, /* Return */ false); - } // Explicitly catch return instructions. if (isa<ReturnInst>(UInst)) { - if (UInst->getFunction() == NoCaptureAA.getAnchorScope()) - return isCapturedIn(/* Memory */ false, /* Integer */ false, + if (UInst->getFunction() == getAnchorScope()) + return isCapturedIn(State, /* Memory */ false, /* Integer */ false, /* Return */ true); - return isCapturedIn(/* Memory */ true, /* Integer */ true, + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, /* Return */ true); } // For now we only use special logic for call sites. However, the tracker // itself knows about a lot of other non-capturing cases already. auto *CB = dyn_cast<CallBase>(UInst); - if (!CB || !CB->isArgOperand(U)) - return isCapturedIn(/* Memory */ true, /* Integer */ true, + if (!CB || !CB->isArgOperand(&U)) + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, /* Return */ true); - unsigned ArgNo = CB->getArgOperandNo(U); + unsigned ArgNo = CB->getArgOperandNo(&U); const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo); // If we have a abstract no-capture attribute for the argument we can use // it to justify a non-capture attribute here. This allows recursion! auto &ArgNoCaptureAA = - A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos, DepClassTy::REQUIRED); + A.getAAFor<AANoCapture>(*this, CSArgPos, DepClassTy::REQUIRED); if (ArgNoCaptureAA.isAssumedNoCapture()) - return isCapturedIn(/* Memory */ false, /* Integer */ false, + return isCapturedIn(State, /* Memory */ false, /* Integer */ false, /* Return */ false); if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { - addPotentialCopy(*CB); - return isCapturedIn(/* Memory */ false, /* Integer */ false, + Follow = true; + return isCapturedIn(State, /* Memory */ false, /* Integer */ false, /* Return */ false); } // Lastly, we could not find a reason no-capture can be assumed so we don't. - return isCapturedIn(/* Memory */ true, /* Integer */ true, + return isCapturedIn(State, /* Memory */ true, /* Integer */ true, /* Return */ true); } - /// Register \p CS as potential copy of the value we are checking. - void addPotentialCopy(CallBase &CB) { PotentialCopies.insert(&CB); } - - /// See CaptureTracker::shouldExplore(...). - bool shouldExplore(const Use *U) override { - // Check liveness and ignore droppable users. - bool UsedAssumedInformation = false; - return !U->getUser()->isDroppable() && - !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA, - UsedAssumedInformation); - } - - /// Update the state according to \p CapturedInMem, \p CapturedInInt, and - /// \p CapturedInRet, then return the appropriate value for use in the - /// CaptureTracker::captured() interface. - bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, - bool CapturedInRet) { + /// Update \p State according to \p CapturedInMem, \p CapturedInInt, and + /// \p CapturedInRet, then return true if we should continue updating the + /// state. + static bool isCapturedIn(AANoCapture::StateType &State, bool CapturedInMem, + bool CapturedInInt, bool CapturedInRet) { LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); if (CapturedInMem) @@ -5008,27 +5217,8 @@ struct AACaptureUseTracker final : public CaptureTracker { State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); if (CapturedInRet) State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); - return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); } - -private: - /// The attributor providing in-flight abstract attributes. - Attributor &A; - - /// The abstract attribute currently updated. - AANoCapture &NoCaptureAA; - - /// The abstract liveness state. - const AAIsDead &IsDeadAA; - - /// The state currently updated. - AANoCapture::StateType &State; - - /// Set of potential copies of the tracked value. - SmallSetVector<Value *, 4> &PotentialCopies; - - /// Global counter to limit the number of explored uses. - unsigned &RemainingUsesToExplore; }; ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { @@ -5042,7 +5232,6 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); assert(F && "Expected a function!"); const IRPosition &FnPos = IRPosition::function(*F); - const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos, DepClassTy::NONE); AANoCapture::StateType T; @@ -5059,6 +5248,8 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { // AAReturnedValues, e.g., track all values that escape through returns // directly somehow. auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + if (!RVAA.getState().isValidState()) + return false; bool SeenConstant = false; for (auto &It : RVAA.returned_values()) { if (isa<Constant>(It.first)) { @@ -5094,21 +5285,27 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { } } - // Use the CaptureTracker interface and logic with the specialized tracker, - // defined in AACaptureUseTracker, that can look at in-flight abstract - // attributes and directly updates the assumed state. - SmallSetVector<Value *, 4> PotentialCopies; - unsigned RemainingUsesToExplore = - getDefaultMaxUsesToExploreForCaptureTracking(); - AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, - RemainingUsesToExplore); + auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) { + const auto &DerefAA = A.getAAFor<AADereferenceable>( + *this, IRPosition::value(*O), DepClassTy::OPTIONAL); + return DerefAA.getAssumedDereferenceableBytes(); + }; - // Check all potential copies of the associated value until we can assume - // none will be captured or we have to assume at least one might be. - unsigned Idx = 0; - PotentialCopies.insert(V); - while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) - Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); + auto UseCheck = [&](const Use &U, bool &Follow) -> bool { + switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { + case UseCaptureKind::NO_CAPTURE: + return true; + case UseCaptureKind::MAY_CAPTURE: + return checkUse(A, T, U, Follow); + case UseCaptureKind::PASSTHROUGH: + Follow = true; + return true; + } + llvm_unreachable("Unexpected use capture kind!"); + }; + + if (!A.checkForAllUses(UseCheck, *this, *V)) + return indicatePessimisticFixpoint(); AANoCapture::StateType &S = getState(); auto Assumed = S.getAssumed(); @@ -5208,6 +5405,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { STATS_DECLTRACK_CSRET_ATTR(nocapture) } }; +} // namespace /// ------------------ Value Simplify Attribute ---------------------------- @@ -5219,7 +5417,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) { return false; LLVM_DEBUG({ - if (SimplifiedAssociatedValue.hasValue()) + if (SimplifiedAssociatedValue) dbgs() << "[ValueSimplify] is assumed to be " << **SimplifiedAssociatedValue << "\n"; else @@ -5228,6 +5426,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) { return true; } +namespace { struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -5243,9 +5442,9 @@ struct AAValueSimplifyImpl : AAValueSimplify { /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { LLVM_DEBUG({ - errs() << "SAV: " << SimplifiedAssociatedValue << " "; + dbgs() << "SAV: " << (bool)SimplifiedAssociatedValue << " "; if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue) - errs() << "SAV: " << **SimplifiedAssociatedValue << " "; + dbgs() << "SAV: " << **SimplifiedAssociatedValue << " "; }); return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple") : "not-simple"; @@ -5259,24 +5458,101 @@ struct AAValueSimplifyImpl : AAValueSimplify { return SimplifiedAssociatedValue; } + /// Ensure the return value is \p V with type \p Ty, if not possible return + /// nullptr. If \p Check is true we will only verify such an operation would + /// suceed and return a non-nullptr value if that is the case. No IR is + /// generated or modified. + static Value *ensureType(Attributor &A, Value &V, Type &Ty, Instruction *CtxI, + bool Check) { + if (auto *TypedV = AA::getWithType(V, Ty)) + return TypedV; + if (CtxI && V.getType()->canLosslesslyBitCastTo(&Ty)) + return Check ? &V + : BitCastInst::CreatePointerBitCastOrAddrSpaceCast(&V, &Ty, + "", CtxI); + return nullptr; + } + + /// Reproduce \p I with type \p Ty or return nullptr if that is not posisble. + /// If \p Check is true we will only verify such an operation would suceed and + /// return a non-nullptr value if that is the case. No IR is generated or + /// modified. + static Value *reproduceInst(Attributor &A, + const AbstractAttribute &QueryingAA, + Instruction &I, Type &Ty, Instruction *CtxI, + bool Check, ValueToValueMapTy &VMap) { + assert(CtxI && "Cannot reproduce an instruction without context!"); + if (Check && (I.mayReadFromMemory() || + !isSafeToSpeculativelyExecute(&I, CtxI, /* DT */ nullptr, + /* TLI */ nullptr))) + return nullptr; + for (Value *Op : I.operands()) { + Value *NewOp = reproduceValue(A, QueryingAA, *Op, Ty, CtxI, Check, VMap); + if (!NewOp) { + assert(Check && "Manifest of new value unexpectedly failed!"); + return nullptr; + } + if (!Check) + VMap[Op] = NewOp; + } + if (Check) + return &I; + + Instruction *CloneI = I.clone(); + // TODO: Try to salvage debug information here. + CloneI->setDebugLoc(DebugLoc()); + VMap[&I] = CloneI; + CloneI->insertBefore(CtxI); + RemapInstruction(CloneI, VMap); + return CloneI; + } + + /// Reproduce \p V with type \p Ty or return nullptr if that is not posisble. + /// If \p Check is true we will only verify such an operation would suceed and + /// return a non-nullptr value if that is the case. No IR is generated or + /// modified. + static Value *reproduceValue(Attributor &A, + const AbstractAttribute &QueryingAA, Value &V, + Type &Ty, Instruction *CtxI, bool Check, + ValueToValueMapTy &VMap) { + if (const auto &NewV = VMap.lookup(&V)) + return NewV; + bool UsedAssumedInformation = false; + Optional<Value *> SimpleV = + A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation); + if (!SimpleV) + return PoisonValue::get(&Ty); + Value *EffectiveV = &V; + if (SimpleV.getValue()) + EffectiveV = SimpleV.getValue(); + if (auto *C = dyn_cast<Constant>(EffectiveV)) + if (!C->canTrap()) + return C; + if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI), + A.getInfoCache())) + return ensureType(A, *EffectiveV, Ty, CtxI, Check); + if (auto *I = dyn_cast<Instruction>(EffectiveV)) + if (Value *NewV = reproduceInst(A, QueryingAA, *I, Ty, CtxI, Check, VMap)) + return ensureType(A, *NewV, Ty, CtxI, Check); + return nullptr; + } + /// Return a value we can use as replacement for the associated one, or /// nullptr if we don't have one that makes sense. - Value *getReplacementValue(Attributor &A) const { - Value *NewV; - NewV = SimplifiedAssociatedValue.hasValue() - ? SimplifiedAssociatedValue.getValue() - : UndefValue::get(getAssociatedType()); - if (!NewV) - return nullptr; - NewV = AA::getWithType(*NewV, *getAssociatedType()); - if (!NewV || NewV == &getAssociatedValue()) - return nullptr; - const Instruction *CtxI = getCtxI(); - if (CtxI && !AA::isValidAtPosition(*NewV, *CtxI, A.getInfoCache())) - return nullptr; - if (!CtxI && !AA::isValidInScope(*NewV, getAnchorScope())) - return nullptr; - return NewV; + Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const { + Value *NewV = SimplifiedAssociatedValue + ? SimplifiedAssociatedValue.getValue() + : UndefValue::get(getAssociatedType()); + if (NewV && NewV != &getAssociatedValue()) { + ValueToValueMapTy VMap; + // First verify we can reprduce the value with the required type at the + // context location before we actually start modifying the IR. + if (reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI, + /* CheckOnly */ true, VMap)) + return reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI, + /* CheckOnly */ false, VMap); + } + return nullptr; } /// Helper function for querying AAValueSimplify and updating candicate. @@ -5300,14 +5576,14 @@ struct AAValueSimplifyImpl : AAValueSimplify { const auto &AA = A.getAAFor<AAType>(*this, getIRPosition(), DepClassTy::NONE); - Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A); + Optional<Constant *> COpt = AA.getAssumedConstant(A); - if (!COpt.hasValue()) { + if (!COpt) { SimplifiedAssociatedValue = llvm::None; A.recordDependence(AA, *this, DepClassTy::OPTIONAL); return true; } - if (auto *C = COpt.getValue()) { + if (auto *C = *COpt) { SimplifiedAssociatedValue = C; A.recordDependence(AA, *this, DepClassTy::OPTIONAL); return true; @@ -5318,7 +5594,7 @@ struct AAValueSimplifyImpl : AAValueSimplify { bool askSimplifiedValueForOtherAAs(Attributor &A) { if (askSimplifiedValueFor<AAValueConstantRange>(A)) return true; - if (askSimplifiedValueFor<AAPotentialValues>(A)) + if (askSimplifiedValueFor<AAPotentialConstantValues>(A)) return true; return false; } @@ -5326,14 +5602,18 @@ struct AAValueSimplifyImpl : AAValueSimplify { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; - if (getAssociatedValue().user_empty()) - return Changed; - - if (auto *NewV = getReplacementValue(A)) { - LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() << " -> " - << *NewV << " :: " << *this << "\n"); - if (A.changeValueAfterManifest(getAssociatedValue(), *NewV)) - Changed = ChangeStatus::CHANGED; + for (auto &U : getAssociatedValue().uses()) { + // Check if we need to adjust the insertion point to make sure the IR is + // valid. + Instruction *IP = dyn_cast<Instruction>(U.getUser()); + if (auto *PHI = dyn_cast_or_null<PHINode>(IP)) + IP = PHI->getIncomingBlock(U)->getTerminator(); + if (auto *NewV = manifestReplacementValue(A, IP)) { + LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() + << " -> " << *NewV << " :: " << *this << "\n"); + if (A.changeUseAfterManifest(U, *NewV)) + Changed = ChangeStatus::CHANGED; + } } return Changed | AAValueSimplify::manifest(A); @@ -5344,73 +5624,6 @@ struct AAValueSimplifyImpl : AAValueSimplify { SimplifiedAssociatedValue = &getAssociatedValue(); return AAValueSimplify::indicatePessimisticFixpoint(); } - - static bool handleLoad(Attributor &A, const AbstractAttribute &AA, - LoadInst &L, function_ref<bool(Value &)> Union) { - auto UnionWrapper = [&](Value &V, Value &Obj) { - if (isa<AllocaInst>(Obj)) - return Union(V); - if (!AA::isDynamicallyUnique(A, AA, V)) - return false; - if (!AA::isValidAtPosition(V, L, A.getInfoCache())) - return false; - return Union(V); - }; - - Value &Ptr = *L.getPointerOperand(); - SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L)) - return false; - - const auto *TLI = - A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction()); - for (Value *Obj : Objects) { - LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); - if (isa<UndefValue>(Obj)) - continue; - if (isa<ConstantPointerNull>(Obj)) { - // A null pointer access can be undefined but any offset from null may - // be OK. We do not try to optimize the latter. - bool UsedAssumedInformation = false; - if (!NullPointerIsDefined(L.getFunction(), - Ptr.getType()->getPointerAddressSpace()) && - A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj) - continue; - return false; - } - Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI); - if (!InitialVal || !Union(*InitialVal)) - return false; - - LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store " - "propagation, checking accesses next.\n"); - - auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { - LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n"); - if (Acc.isWrittenValueYetUndetermined()) - return true; - Value *Content = Acc.getWrittenValue(); - if (!Content) - return false; - Value *CastedContent = - AA::getWithType(*Content, *AA.getAssociatedType()); - if (!CastedContent) - return false; - if (IsExact) - return UnionWrapper(*CastedContent, *Obj); - if (auto *C = dyn_cast<Constant>(CastedContent)) - if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C)) - return UnionWrapper(*CastedContent, *Obj); - return false; - }; - - auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj), - DepClassTy::REQUIRED); - if (!PI.forallInterferingWrites(A, AA, L, CheckAccess)) - return false; - } - return true; - } }; struct AAValueSimplifyArgument final : AAValueSimplifyImpl { @@ -5425,15 +5638,6 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { Attribute::StructRet, Attribute::Nest, Attribute::ByVal}, /* IgnoreSubsumingPositions */ true)) indicatePessimisticFixpoint(); - - // FIXME: This is a hack to prevent us from propagating function poiner in - // the new pass manager CGSCC pass as it creates call edges the - // CallGraphUpdater cannot handle yet. - Value &V = getAssociatedValue(); - if (V.getType()->isPointerTy() && - V.getType()->getPointerElementType()->isFunctionTy() && - !A.isModulePass()) - indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). @@ -5466,7 +5670,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { bool UsedAssumedInformation = false; Optional<Constant *> SimpleArgOp = A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation); - if (!SimpleArgOp.hasValue()) + if (!SimpleArgOp) return true; if (!SimpleArgOp.getValue()) return false; @@ -5477,14 +5681,14 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { // Generate a answer specific to a call site context. bool Success; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (hasCallBaseContext() && getCallBaseContext()->getCalledFunction() == Arg->getParent()) Success = PredForCallSite( AbstractCallSite(&getCallBaseContext()->getCalledOperandUse())); else Success = A.checkForAllCallSites(PredForCallSite, *this, true, - AllCallSitesKnown); + UsedAssumedInformation); if (!Success) if (!askSimplifiedValueForOtherAAs(A)) @@ -5516,12 +5720,16 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl { ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; - auto PredForReturned = [&](Value &V) { - return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext())); + auto ReturnInstCB = [&](Instruction &I) { + auto &RI = cast<ReturnInst>(I); + return checkAndUpdate( + A, *this, + IRPosition::value(*RI.getReturnValue(), getCallBaseContext())); }; - if (!A.checkForAllReturnedValues(PredForReturned, *this)) + bool UsedAssumedInformation = false; + if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret}, + UsedAssumedInformation)) if (!askSimplifiedValueForOtherAAs(A)) return indicatePessimisticFixpoint(); @@ -5531,29 +5739,9 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl { } ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - if (auto *NewV = getReplacementValue(A)) { - auto PredForReturned = - [&](Value &, const SmallSetVector<ReturnInst *, 4> &RetInsts) { - for (ReturnInst *RI : RetInsts) { - Value *ReturnedVal = RI->getReturnValue(); - if (ReturnedVal == NewV || isa<UndefValue>(ReturnedVal)) - return true; - assert(RI->getFunction() == getAnchorScope() && - "ReturnInst in wrong function!"); - LLVM_DEBUG(dbgs() - << "[ValueSimplify] " << *ReturnedVal << " -> " - << *NewV << " in " << *RI << " :: " << *this << "\n"); - if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV)) - Changed = ChangeStatus::CHANGED; - } - return true; - }; - A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this); - } - - return Changed | AAValueSimplify::manifest(A); + // We queried AAValueSimplify for the returned values so they will be + // replaced if a simplified form was found. Nothing to do here. + return ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() @@ -5597,7 +5785,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return true; if (!SimplifiedLHS.getValue()) return false; @@ -5606,7 +5794,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return true; if (!SimplifiedRHS.getValue()) return false; @@ -5662,15 +5850,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { return true; } - bool updateWithLoad(Attributor &A, LoadInst &L) { - auto Union = [&](Value &V) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, &V, L.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - }; - return handleLoad(A, *this, L, Union); - } - /// Use the generic, non-optimistic InstSimplfy functionality if we managed to /// simplify any operand of the instruction \p I. Return true if successful, /// in that case SimplifiedAssociatedValue will be updated. @@ -5686,7 +5865,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { *this, UsedAssumedInformation); // If we are not sure about any operand we are not sure about the entire // instruction, we'll wait. - if (!SimplifiedOp.hasValue()) + if (!SimplifiedOp) return true; if (SimplifiedOp.getValue()) @@ -5714,7 +5893,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { const DataLayout &DL = I.getModule()->getDataLayout(); SimplifyQuery Q(DL, TLI, DT, AC, &I); if (Value *SimplifiedI = - SimplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { + simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( SimplifiedAssociatedValue, SimplifiedI, I.getType()); return SimplifiedAssociatedValue != Optional<Value *>(nullptr); @@ -5726,6 +5905,36 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; + // Do not simplify loads that are only used in llvm.assume if we cannot also + // remove all stores that may feed into the load. The reason is that the + // assume is probably worth something as long as the stores are around. + if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) { + InformationCache &InfoCache = A.getInfoCache(); + if (InfoCache.isOnlyUsedByAssume(*LI)) { + SmallSetVector<Value *, 4> PotentialCopies; + SmallSetVector<Instruction *, 4> PotentialValueOrigins; + bool UsedAssumedInformation = false; + if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, + PotentialValueOrigins, *this, + UsedAssumedInformation, + /* OnlyExact */ true)) { + if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { + if (!I) + return true; + if (auto *SI = dyn_cast<StoreInst>(I)) + return A.isAssumedDead(SI->getOperandUse(0), this, + /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + })) + return indicatePessimisticFixpoint(); + } + } + } + auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, bool Stripped) -> bool { auto &AA = A.getAAFor<AAValueSimplify>( @@ -5734,9 +5943,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { if (!Stripped && this == &AA) { if (auto *I = dyn_cast<Instruction>(&V)) { - if (auto *LI = dyn_cast<LoadInst>(&V)) - if (updateWithLoad(A, *LI)) - return true; if (auto *Cmp = dyn_cast<CmpInst>(&V)) if (handleCmp(A, *Cmp)) return true; @@ -5754,8 +5960,10 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { }; bool Dummy = false; + bool UsedAssumedInformation = false; if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(), + UsedAssumedInformation, /* UseValueSimplify */ false)) if (!askSimplifiedValueForOtherAAs(A)) return indicatePessimisticFixpoint(); @@ -5806,8 +6014,23 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl { void initialize(Attributor &A) override { AAValueSimplifyImpl::initialize(A); - if (!getAssociatedFunction()) + Function *Fn = getAssociatedFunction(); + if (!Fn) { indicatePessimisticFixpoint(); + return; + } + for (Argument &Arg : Fn->args()) { + if (Arg.hasReturnedAttr()) { + auto IRP = IRPosition::callsite_argument(*cast<CallBase>(getCtxI()), + Arg.getArgNo()); + if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_ARGUMENT && + checkAndUpdate(A, *this, IRP)) + indicateOptimisticFixpoint(); + else + indicatePessimisticFixpoint(); + return; + } + } } /// See AbstractAttribute::updateImpl(...). @@ -5845,8 +6068,13 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; + // TODO: We should avoid simplification duplication to begin with. + auto *FloatAA = A.lookupAAFor<AAValueSimplify>( + IRPosition::value(getAssociatedValue()), this, DepClassTy::NONE); + if (FloatAA && FloatAA->getState().isValidState()) + return Changed; - if (auto *NewV = getReplacementValue(A)) { + if (auto *NewV = manifestReplacementValue(A, getCtxI())) { Use &U = cast<CallBase>(&getAnchorValue()) ->getArgOperandUse(getCallSiteArgNo()); if (A.changeUseAfterManifest(U, *NewV)) @@ -5860,8 +6088,10 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { STATS_DECLTRACK_CSARG_ATTR(value_simplify) } }; +} // namespace /// ----------------------- Heap-To-Stack Conversion --------------------------- +namespace { struct AAHeapToStackFunction final : public AAHeapToStack { struct AllocationInfo { @@ -5883,7 +6113,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool HasPotentiallyFreeingUnknownUses = false; /// The set of free calls that use this allocation. - SmallPtrSet<CallBase *, 1> PotentialFreeCalls{}; + SmallSetVector<CallBase *, 1> PotentialFreeCalls{}; }; struct DeallocationInfo { @@ -5895,7 +6125,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool MightFreeUnknownObjects = false; /// The set of allocation calls that are potentially freed. - SmallPtrSet<CallBase *, 1> PotentialAllocationCalls{}; + SmallSetVector<CallBase *, 1> PotentialAllocationCalls{}; }; AAHeapToStackFunction(const IRPosition &IRP, Attributor &A) @@ -5905,9 +6135,9 @@ struct AAHeapToStackFunction final : public AAHeapToStack { // Ensure we call the destructor so we release any memory allocated in the // sets. for (auto &It : AllocationInfos) - It.getSecond()->~AllocationInfo(); + It.second->~AllocationInfo(); for (auto &It : DeallocationInfos) - It.getSecond()->~DeallocationInfo(); + It.second->~DeallocationInfo(); } void initialize(Attributor &A) override { @@ -5932,7 +6162,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) { AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB}; AllocationInfos[CB] = AI; - TLI->getLibFunc(*CB, AI->LibraryFunctionId); + if (TLI) + TLI->getLibFunc(*CB, AI->LibraryFunctionId); } } return true; @@ -5945,6 +6176,16 @@ struct AAHeapToStackFunction final : public AAHeapToStack { /* CheckPotentiallyDead */ true); (void)Success; assert(Success && "Did not expect the call base visit callback to fail!"); + + Attributor::SimplifictionCallbackTy SCB = + [](const IRPosition &, const AbstractAttribute *, + bool &) -> Optional<Value *> { return nullptr; }; + for (const auto &It : AllocationInfos) + A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first), + SCB); + for (const auto &It : DeallocationInfos) + A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first), + SCB); } const std::string getAsStr() const override { @@ -5971,7 +6212,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool isAssumedHeapToStack(const CallBase &CB) const override { if (isValidState()) - if (AllocationInfo *AI = AllocationInfos.lookup(&CB)) + if (AllocationInfo *AI = + AllocationInfos.lookup(const_cast<CallBase *>(&CB))) return AI->Status != AllocationInfo::INVALID; return false; } @@ -6000,6 +6242,17 @@ struct AAHeapToStackFunction final : public AAHeapToStack { Function *F = getAnchorScope(); const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + LoopInfo *LI = + A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F); + Optional<bool> MayContainIrreducibleControl; + auto IsInLoop = [&](BasicBlock &BB) { + if (!MayContainIrreducibleControl.has_value()) + MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI); + if (MayContainIrreducibleControl.value()) + return true; + return LI->getLoopFor(&BB) != nullptr; + }; + for (auto &It : AllocationInfos) { AllocationInfo &AI = *It.second; if (AI.Status == AllocationInfo::INVALID) @@ -6026,13 +6279,13 @@ struct AAHeapToStackFunction final : public AAHeapToStack { else A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark); + const DataLayout &DL = A.getInfoCache().getDL(); Value *Size; Optional<APInt> SizeAPI = getSize(A, *this, AI); - if (SizeAPI.hasValue()) { + if (SizeAPI) { Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI); } else { LLVMContext &Ctx = AI.CB->getContext(); - auto &DL = A.getInfoCache().getDL(); ObjectSizeOpts Opts; ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, Opts); SizeOffsetEvalType SizeOffsetPair = Eval.compute(AI.CB); @@ -6041,32 +6294,36 @@ struct AAHeapToStackFunction final : public AAHeapToStack { Size = SizeOffsetPair.first; } + Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent())) + ? AI.CB + : &F->getEntryBlock().front(); + Align Alignment(1); if (MaybeAlign RetAlign = AI.CB->getRetAlign()) - Alignment = max(Alignment, RetAlign); + Alignment = std::max(Alignment, *RetAlign); if (Value *Align = getAllocAlignment(AI.CB, TLI)) { Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align); - assert(AlignmentAPI.hasValue() && + assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 && "Expected an alignment during manifest!"); - Alignment = - max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue())); + Alignment = std::max( + Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue())); } - unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace(); - Instruction *Alloca = - new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment, - "", AI.CB->getNextNode()); + // TODO: Hoist the alloca towards the function entry. + unsigned AS = DL.getAllocaAddrSpace(); + Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, + Size, Alignment, "", IP); if (Alloca->getType() != AI.CB->getType()) - Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc", - Alloca->getNextNode()); + Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + Alloca, AI.CB->getType(), "malloc_cast", AI.CB); auto *I8Ty = Type::getInt8Ty(F->getContext()); auto *InitVal = getInitialValueOfAllocation(AI.CB, TLI, I8Ty); assert(InitVal && "Must be able to materialize initial memory state of allocation"); - A.changeValueAfterManifest(*AI.CB, *Alloca); + A.changeAfterManifest(IRPosition::inst(*AI.CB), *Alloca); if (auto *II = dyn_cast<InvokeInst>(AI.CB)) { auto *NBB = II->getNormalDest(); @@ -6095,7 +6352,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack { bool UsedAssumedInformation = false; Optional<Constant *> SimpleV = A.getAssumedConstant(V, AA, UsedAssumedInformation); - if (!SimpleV.hasValue()) + if (!SimpleV) return APInt(64, 0); if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue())) return CI->getValue(); @@ -6120,11 +6377,11 @@ struct AAHeapToStackFunction final : public AAHeapToStack { /// Collection of all malloc-like calls in a function with associated /// information. - DenseMap<CallBase *, AllocationInfo *> AllocationInfos; + MapVector<CallBase *, AllocationInfo *> AllocationInfos; /// Collection of all free-like calls in a function with associated /// information. - DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos; + MapVector<CallBase *, DeallocationInfo *> DeallocationInfos; ChangeStatus updateImpl(Attributor &A) override; }; @@ -6167,7 +6424,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { // branches etc. SmallVector<Value *, 8> Objects; if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects, - *this, DI.CB)) { + *this, DI.CB, + UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n"); @@ -6239,6 +6497,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { dbgs() << "[H2S] unique free call might free unknown allocations\n"); return false; } + if (DI->PotentialAllocationCalls.empty()) + return true; if (DI->PotentialAllocationCalls.size() > 1) { LLVM_DEBUG(dbgs() << "[H2S] unique free call might free " << DI->PotentialAllocationCalls.size() @@ -6316,7 +6576,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { if (ValidUsesOnly && AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) - A.emitRemark<OptimizationRemarkMissed>(AI.CB, "OMP113", Remark); + A.emitRemark<OptimizationRemarkMissed>(CB, "OMP113", Remark); LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); ValidUsesOnly = false; @@ -6348,7 +6608,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { continue; if (Value *Align = getAllocAlignment(AI.CB, TLI)) { - if (!getAPInt(A, *this, *Align)) { + Optional<APInt> APAlign = getAPInt(A, *this, *Align); + if (!APAlign) { // Can't generate an alloca which respects the required alignment // on the allocation. LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB @@ -6356,14 +6617,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { AI.Status = AllocationInfo::INVALID; Changed = ChangeStatus::CHANGED; continue; + } else { + if (APAlign->ugt(llvm::Value::MaximumAlignment) || + !APAlign->isPowerOf2()) { + LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign + << "\n"); + AI.Status = AllocationInfo::INVALID; + Changed = ChangeStatus::CHANGED; + continue; + } } } if (MaxHeapToStackSize != -1) { Optional<APInt> Size = getSize(A, *this, AI); - if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) { + if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) { LLVM_DEBUG({ - if (!Size.hasValue()) + if (!Size) dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n"; else dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. " @@ -6395,8 +6665,10 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { return Changed; } +} // namespace /// ----------------------- Privatizable Pointers ------------------------------ +namespace { struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A) : AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {} @@ -6414,9 +6686,9 @@ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { /// Return a privatizable type that encloses both T0 and T1. /// TODO: This is merely a stub for now as we should manage a mapping as well. Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) { - if (!T0.hasValue()) + if (!T0) return T1; - if (!T1.hasValue()) + if (!T1) return T0; if (T0 == T1) return T0; @@ -6445,11 +6717,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Optional<Type *> identifyPrivatizableType(Attributor &A) override { // If this is a byval argument and we know all the call sites (so we can // rewrite them), there is no need to check them explicitly. - bool AllCallSitesKnown; - if (getIRPosition().hasAttr(Attribute::ByVal) && + bool UsedAssumedInformation = false; + SmallVector<Attribute, 1> Attrs; + getAttrs({Attribute::ByVal}, Attrs, /* IgnoreSubsumingPositions */ true); + if (!Attrs.empty() && A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this, - true, AllCallSitesKnown)) - return getAssociatedValue().getType()->getPointerElementType(); + true, UsedAssumedInformation)) + return Attrs[0].getValueAsType(); Optional<Type *> Ty; unsigned ArgNo = getIRPosition().getCallSiteArgNo(); @@ -6474,9 +6748,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { LLVM_DEBUG({ dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; - if (CSTy.hasValue() && CSTy.getValue()) + if (CSTy && CSTy.getValue()) CSTy.getValue()->print(dbgs()); - else if (CSTy.hasValue()) + else if (CSTy) dbgs() << "<nullptr>"; else dbgs() << "<none>"; @@ -6486,19 +6760,20 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { LLVM_DEBUG({ dbgs() << " : New Type: "; - if (Ty.hasValue() && Ty.getValue()) + if (Ty && Ty.getValue()) Ty.getValue()->print(dbgs()); - else if (Ty.hasValue()) + else if (Ty) dbgs() << "<nullptr>"; else dbgs() << "<none>"; dbgs() << "\n"; }); - return !Ty.hasValue() || Ty.getValue(); + return !Ty || Ty.getValue(); }; - if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown)) + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, + UsedAssumedInformation)) return nullptr; return Ty; } @@ -6506,7 +6781,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { PrivatizableType = identifyPrivatizableType(A); - if (!PrivatizableType.hasValue()) + if (!PrivatizableType) return ChangeStatus::UNCHANGED; if (!PrivatizableType.getValue()) return indicatePessimisticFixpoint(); @@ -6518,8 +6793,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // Avoid arguments with padding for now. if (!getIRPosition().hasAttr(Attribute::ByVal) && - !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), - A.getInfoCache().getDL())) { + !isDenselyPacked(*PrivatizableType, A.getInfoCache().getDL())) { LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n"); return indicatePessimisticFixpoint(); } @@ -6527,7 +6801,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // Collect the types that will replace the privatizable type in the function // signature. SmallVector<Type *, 16> ReplacementTypes; - identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + identifyReplacementTypes(*PrivatizableType, ReplacementTypes); // Verify callee and caller agree on how the promoted argument would be // passed. @@ -6545,9 +6819,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return TTI->areTypesABICompatible( CB->getCaller(), CB->getCalledFunction(), ReplacementTypes); }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; if (!A.checkForAllCallSites(CallSiteCheck, *this, true, - AllCallSitesKnown)) { + UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for " << Fn.getName() << "\n"); @@ -6595,7 +6869,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { *this, IRPosition::argument(CBArg), DepClassTy::REQUIRED); if (CBArgPrivAA.isValidState()) { auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType(); - if (!CBArgPrivTy.hasValue()) + if (!CBArgPrivTy) continue; if (CBArgPrivTy.getValue() == PrivatizableType) continue; @@ -6642,7 +6916,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { DepClassTy::REQUIRED); if (DCArgPrivAA.isValidState()) { auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType(); - if (!DCArgPrivTy.hasValue()) + if (!DCArgPrivTy) return true; if (DCArgPrivTy.getValue() == PrivatizableType) return true; @@ -6674,7 +6948,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { }; if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true, - AllCallSitesKnown)) + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; @@ -6749,8 +7023,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Type *PrivPtrType = PrivType->getPointerTo(); if (Base->getType() != PrivPtrType) - Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "", - ACS.getInstruction()); + Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + Base, PrivPtrType, "", ACS.getInstruction()); // Traverse the type, build GEPs and loads. if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) { @@ -6784,7 +7058,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { /// See AbstractAttribute::manifest(...) ChangeStatus manifest(Attributor &A) override { - if (!PrivatizableType.hasValue()) + if (!PrivatizableType) return ChangeStatus::UNCHANGED; assert(PrivatizableType.getValue() && "Expected privatizable type!"); @@ -6817,14 +7091,16 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Function &ReplacementFn, Function::arg_iterator ArgIt) { BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); Instruction *IP = &*EntryBB.getFirstInsertionPt(); - Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0, + const DataLayout &DL = IP->getModule()->getDataLayout(); + unsigned AS = DL.getAllocaAddrSpace(); + Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS, Arg->getName() + ".priv", IP); createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, ArgIt->getArgNo(), *IP); if (AI->getType() != Arg->getType()) - AI = - BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP); + AI = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + AI, Arg->getType(), "", IP); Arg->replaceAllUsesWith(AI); for (CallInst *CI : TailCalls) @@ -6841,8 +7117,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // When no alignment is specified for the load instruction, // natural alignment is assumed. createReplacementValues( - assumeAligned(AlignAA.getAssumedAlign()), - PrivatizableType.getValue(), ACS, + AlignAA.getAssumedAlign(), *PrivatizableType, ACS, ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), NewArgOperands); }; @@ -6850,7 +7125,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { // Collect the types that will replace the privatizable type in the function // signature. SmallVector<Type *, 16> ReplacementTypes; - identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + identifyReplacementTypes(*PrivatizableType, ReplacementTypes); // Register a rewrite of the argument. if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes, @@ -6897,7 +7172,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { auto &PrivArgAA = A.getAAFor<AAPrivatizablePtr>( *this, IRPosition::argument(*Arg), DepClassTy::REQUIRED); if (PrivArgAA.isAssumedPrivatizablePtr()) - return Obj->getType()->getPointerElementType(); + return PrivArgAA.getPrivatizableType(); } LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid " @@ -6926,7 +7201,7 @@ struct AAPrivatizablePtrCallSiteArgument final /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { PrivatizableType = identifyPrivatizableType(A); - if (!PrivatizableType.hasValue()) + if (!PrivatizableType) return ChangeStatus::UNCHANGED; if (!PrivatizableType.getValue()) return indicatePessimisticFixpoint(); @@ -6992,10 +7267,12 @@ struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating { STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); } }; +} // namespace /// -------------------- Memory Behavior Attributes ---------------------------- /// Includes read-none, read-only, and write-only. /// ---------------------------------------------------------------------------- +namespace { struct AAMemoryBehaviorImpl : public AAMemoryBehavior { AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A) : AAMemoryBehavior(IRP, A) {} @@ -7495,6 +7772,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U, if (UserI->mayWriteToMemory()) removeAssumedBits(NO_WRITES); } +} // namespace /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, @@ -7528,6 +7806,7 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr( return S; } +namespace { struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) @@ -7772,8 +8051,10 @@ void AAMemoryLocationImpl::categorizePtrValue( << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); SmallVector<Value *, 8> Objects; + bool UsedAssumedInformation = false; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, - /* Intraprocedural */ true)) { + UsedAssumedInformation, + AA::Intraprocedural)) { LLVM_DEBUG( dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n"); updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed, @@ -8042,9 +8323,11 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { STATS_DECLTRACK_CS_ATTR(readnone) } }; +} // namespace /// ------------------ Value Constant Range Attribute ------------------------- +namespace { struct AAValueConstantRangeImpl : AAValueConstantRange { using StateType = IntegerRangeState; AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A) @@ -8379,7 +8662,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return true; if (!SimplifiedLHS.getValue()) return false; @@ -8388,7 +8671,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return true; if (!SimplifiedRHS.getValue()) return false; @@ -8432,7 +8715,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedOpV = A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedOpV.hasValue()) + if (!SimplifiedOpV) return true; if (!SimplifiedOpV.getValue()) return false; @@ -8462,7 +8745,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return true; if (!SimplifiedLHS.getValue()) return false; @@ -8471,7 +8754,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return true; if (!SimplifiedRHS.getValue()) return false; @@ -8536,7 +8819,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { const auto &SimplifiedOpV = A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedOpV.hasValue()) + if (!SimplifiedOpV) return true; if (!SimplifiedOpV.getValue()) return false; @@ -8588,8 +8871,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { IntegerRangeState T(getBitWidth()); + bool UsedAssumedInformation = false; if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T, VisitValueCB, getCtxI(), + UsedAssumedInformation, /* UseValueSimplify */ false)) return indicatePessimisticFixpoint(); @@ -8683,21 +8968,23 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { STATS_DECLTRACK_CSARG_ATTR(value_range) } }; +} // namespace /// ------------------ Potential Values Attribute ------------------------- -struct AAPotentialValuesImpl : AAPotentialValues { +namespace { +struct AAPotentialConstantValuesImpl : AAPotentialConstantValues { using StateType = PotentialConstantIntValuesState; - AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) - : AAPotentialValues(IRP, A) {} + AAPotentialConstantValuesImpl(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValues(IRP, A) {} /// See AbstractAttribute::initialize(..). void initialize(Attributor &A) override { if (A.hasSimplificationCallback(getIRPosition())) indicatePessimisticFixpoint(); else - AAPotentialValues::initialize(A); + AAPotentialConstantValues::initialize(A); } /// See AbstractAttribute::getAsStr(). @@ -8714,13 +9001,14 @@ struct AAPotentialValuesImpl : AAPotentialValues { } }; -struct AAPotentialValuesArgument final - : AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl, +struct AAPotentialConstantValuesArgument final + : AAArgumentFromCallSiteArguments<AAPotentialConstantValues, + AAPotentialConstantValuesImpl, PotentialConstantIntValuesState> { - using Base = - AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl, - PotentialConstantIntValuesState>; - AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) + using Base = AAArgumentFromCallSiteArguments<AAPotentialConstantValues, + AAPotentialConstantValuesImpl, + PotentialConstantIntValuesState>; + AAPotentialConstantValuesArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} /// See AbstractAttribute::initialize(..). @@ -8738,11 +9026,12 @@ struct AAPotentialValuesArgument final } }; -struct AAPotentialValuesReturned - : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> { - using Base = - AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>; - AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) +struct AAPotentialConstantValuesReturned + : AAReturnedFromReturnedValues<AAPotentialConstantValues, + AAPotentialConstantValuesImpl> { + using Base = AAReturnedFromReturnedValues<AAPotentialConstantValues, + AAPotentialConstantValuesImpl>; + AAPotentialConstantValuesReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} /// See AbstractAttribute::trackStatistics() @@ -8751,13 +9040,13 @@ struct AAPotentialValuesReturned } }; -struct AAPotentialValuesFloating : AAPotentialValuesImpl { - AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesImpl(IRP, A) {} +struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { + AAPotentialConstantValuesFloating(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValuesImpl(IRP, A) {} /// See AbstractAttribute::initialize(..). void initialize(Attributor &A) override { - AAPotentialValuesImpl::initialize(A); + AAPotentialConstantValuesImpl::initialize(A); if (isAtFixpoint()) return; @@ -8783,7 +9072,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { indicatePessimisticFixpoint(); - LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: " + LLVM_DEBUG(dbgs() << "[AAPotentialConstantValues] We give up: " << getAssociatedValue() << "\n"); } @@ -8891,7 +9180,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; if (!SimplifiedLHS.getValue()) return indicatePessimisticFixpoint(); @@ -8900,7 +9189,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; if (!SimplifiedRHS.getValue()) return indicatePessimisticFixpoint(); @@ -8909,18 +9198,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); + auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); + auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); - const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet(); - const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet(); + const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); + const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); // TODO: make use of undef flag to limit potential values aggressively. bool MaybeTrue = false, MaybeFalse = false; @@ -8974,7 +9263,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; if (!SimplifiedLHS.getValue()) return indicatePessimisticFixpoint(); @@ -8983,7 +9272,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; if (!SimplifiedRHS.getValue()) return indicatePessimisticFixpoint(); @@ -8997,21 +9286,21 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { // Check if we only need one operand. bool OnlyLeft = false, OnlyRight = false; - if (C.hasValue() && *C && (*C)->isOneValue()) + if (C && *C && (*C)->isOneValue()) OnlyLeft = true; - else if (C.hasValue() && *C && (*C)->isZeroValue()) + else if (C && *C && (*C)->isZeroValue()) OnlyRight = true; - const AAPotentialValues *LHSAA = nullptr, *RHSAA = nullptr; + const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr; if (!OnlyRight) { - LHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); + LHSAA = &A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); if (!LHSAA->isValidState()) return indicatePessimisticFixpoint(); } if (!OnlyLeft) { - RHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); + RHSAA = &A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); if (!RHSAA->isValidState()) return indicatePessimisticFixpoint(); } @@ -9049,17 +9338,17 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedSrc = A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedSrc.hasValue()) + if (!SimplifiedSrc) return ChangeStatus::UNCHANGED; if (!SimplifiedSrc.getValue()) return indicatePessimisticFixpoint(); Src = *SimplifiedSrc; - auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src), - DepClassTy::REQUIRED); + auto &SrcAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*Src), DepClassTy::REQUIRED); if (!SrcAA.isValidState()) return indicatePessimisticFixpoint(); - const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet(); + const SetTy &SrcAAPVS = SrcAA.getAssumedSet(); if (SrcAA.undefIsContained()) unionAssumedWithUndef(); else { @@ -9082,7 +9371,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedLHS = A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedLHS.hasValue()) + if (!SimplifiedLHS) return ChangeStatus::UNCHANGED; if (!SimplifiedLHS.getValue()) return indicatePessimisticFixpoint(); @@ -9091,7 +9380,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedRHS = A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedRHS.hasValue()) + if (!SimplifiedRHS) return ChangeStatus::UNCHANGED; if (!SimplifiedRHS.getValue()) return indicatePessimisticFixpoint(); @@ -9100,18 +9389,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); + auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); if (!LHSAA.isValidState()) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); + auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); if (!RHSAA.isValidState()) return indicatePessimisticFixpoint(); - const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet(); - const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet(); + const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); + const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0); // TODO: make use of undef flag to limit potential values aggressively. @@ -9150,13 +9439,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { const auto &SimplifiedIncomingValue = A.getAssumedSimplified( IRPosition::value(*IncomingValue, getCallBaseContext()), *this, UsedAssumedInformation); - if (!SimplifiedIncomingValue.hasValue()) + if (!SimplifiedIncomingValue) continue; if (!SimplifiedIncomingValue.getValue()) return indicatePessimisticFixpoint(); IncomingValue = *SimplifiedIncomingValue; - auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>( + auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED); if (!PotentialValuesAA.isValidState()) return indicatePessimisticFixpoint(); @@ -9169,30 +9458,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { : ChangeStatus::CHANGED; } - ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) { - if (!L.getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto Union = [&](Value &V) { - if (isa<UndefValue>(V)) { - unionAssumedWithUndef(); - return true; - } - if (ConstantInt *CI = dyn_cast<ConstantInt>(&V)) { - unionAssumed(CI->getValue()); - return true; - } - return false; - }; - auto AssumedBefore = getAssumed(); - - if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union)) - return indicatePessimisticFixpoint(); - - return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); @@ -9213,9 +9478,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (auto *PHI = dyn_cast<PHINode>(I)) return updateWithPHINode(A, PHI); - if (auto *L = dyn_cast<LoadInst>(I)) - return updateWithLoad(A, *L); - return indicatePessimisticFixpoint(); } @@ -9225,14 +9487,15 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { } }; -struct AAPotentialValuesFunction : AAPotentialValuesImpl { - AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesImpl(IRP, A) {} +struct AAPotentialConstantValuesFunction : AAPotentialConstantValuesImpl { + AAPotentialConstantValuesFunction(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValuesImpl(IRP, A) {} /// See AbstractAttribute::initialize(...). ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " - "not be called"); + llvm_unreachable( + "AAPotentialConstantValues(Function|CallSite)::updateImpl will " + "not be called"); } /// See AbstractAttribute::trackStatistics() @@ -9241,9 +9504,9 @@ struct AAPotentialValuesFunction : AAPotentialValuesImpl { } }; -struct AAPotentialValuesCallSite : AAPotentialValuesFunction { - AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesFunction(IRP, A) {} +struct AAPotentialConstantValuesCallSite : AAPotentialConstantValuesFunction { + AAPotentialConstantValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAPotentialConstantValuesFunction(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { @@ -9251,11 +9514,13 @@ struct AAPotentialValuesCallSite : AAPotentialValuesFunction { } }; -struct AAPotentialValuesCallSiteReturned - : AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> { - AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AACallSiteReturnedFromReturned<AAPotentialValues, - AAPotentialValuesImpl>(IRP, A) {} +struct AAPotentialConstantValuesCallSiteReturned + : AACallSiteReturnedFromReturned<AAPotentialConstantValues, + AAPotentialConstantValuesImpl> { + AAPotentialConstantValuesCallSiteReturned(const IRPosition &IRP, + Attributor &A) + : AACallSiteReturnedFromReturned<AAPotentialConstantValues, + AAPotentialConstantValuesImpl>(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { @@ -9263,13 +9528,15 @@ struct AAPotentialValuesCallSiteReturned } }; -struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { - AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesFloating(IRP, A) {} +struct AAPotentialConstantValuesCallSiteArgument + : AAPotentialConstantValuesFloating { + AAPotentialConstantValuesCallSiteArgument(const IRPosition &IRP, + Attributor &A) + : AAPotentialConstantValuesFloating(IRP, A) {} /// See AbstractAttribute::initialize(..). void initialize(Attributor &A) override { - AAPotentialValuesImpl::initialize(A); + AAPotentialConstantValuesImpl::initialize(A); if (isAtFixpoint()) return; @@ -9292,8 +9559,8 @@ struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); auto AssumedBefore = getAssumed(); - auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V), - DepClassTy::REQUIRED); + auto &AA = A.getAAFor<AAPotentialConstantValues>( + *this, IRPosition::value(V), DepClassTy::REQUIRED); const auto &S = AA.getAssumed(); unionAssumed(S); return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED @@ -9365,7 +9632,7 @@ struct AANoUndefImpl : AANoUndef { // considered to be dead. We don't manifest noundef in such positions for // the same reason above. if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation) - .hasValue()) + .has_value()) return ChangeStatus::UNCHANGED; return AANoUndef::manifest(A); } @@ -9400,8 +9667,10 @@ struct AANoUndefFloating : public AANoUndefImpl { }; StateType T; + bool UsedAssumedInformation = false; if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI())) + VisitValueCB, getCtxI(), + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); @@ -9518,9 +9787,10 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { // Process any value that we might call. auto ProcessCalledOperand = [&](Value *V) { bool DummyValue = false; + bool UsedAssumedInformation = false; if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this, DummyValue, VisitValue, nullptr, - false)) { + UsedAssumedInformation, false)) { // If we haven't gone through all values, assume that there are unknown // callees. setHasUnknownCallee(true, Change); @@ -9530,7 +9800,9 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { CallBase *CB = cast<CallBase>(getCtxI()); if (CB->isInlineAsm()) { - setHasUnknownCallee(false, Change); + if (!hasAssumption(*CB->getCaller(), "ompx_no_call_asm") && + !hasAssumption(*CB, "ompx_no_call_asm")) + setHasUnknownCallee(false, Change); return Change; } @@ -9584,7 +9856,8 @@ struct AACallEdgesFunction : public AACallEdgesImpl { // Visit all callable instructions. bool UsedAssumedInformation = false; if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this, - UsedAssumedInformation)) { + UsedAssumedInformation, + /* CheckBBLivenessOnly */ true)) { // If we haven't looked at all call like instructions, assume that there // are unknown callees. setHasUnknownCallee(true, Change); @@ -9656,7 +9929,7 @@ private: ArrayRef<const AACallEdges *> AAEdgesList, const Function &Fn) { Optional<bool> Cached = isCachedReachable(Fn); - if (Cached.hasValue()) + if (Cached) return Cached.getValue(); // The query was not cached, thus it is new. We need to request an update @@ -9691,6 +9964,10 @@ private: const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges(); for (Function *Edge : Edges) { + // Functions that do not call back into the module can be ignored. + if (Edge->hasFnAttribute(Attribute::NoCallback)) + continue; + // We don't need a dependency if the result is reachable. const AAFunctionReachability &EdgeReachability = A.getAAFor<AAFunctionReachability>( @@ -9820,22 +10097,21 @@ public: } // Update the Instruction queries. - const AAReachability *Reachability; if (!InstQueries.empty()) { - Reachability = &A.getAAFor<AAReachability>( + const AAReachability *Reachability = &A.getAAFor<AAReachability>( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); - } - // Check for local callbases first. - for (auto &InstPair : InstQueries) { - SmallVector<const AACallEdges *> CallEdges; - bool AllKnown = - getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); - // Update will return change if we this effects any queries. - if (!AllKnown) - InstPair.second.CanReachUnknownCallee = true; - Change |= InstPair.second.update(A, *this, CallEdges); + // Check for local callbases first. + for (auto &InstPair : InstQueries) { + SmallVector<const AACallEdges *> CallEdges; + bool AllKnown = + getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); + // Update will return change if we this effects any queries. + if (!AllKnown) + InstPair.second.CanReachUnknownCallee = true; + Change |= InstPair.second.update(A, *this, CallEdges); + } } return Change; @@ -9862,13 +10138,15 @@ private: /// Used to answer if a call base inside this function can reach a specific /// function. - DenseMap<const CallBase *, QueryResolver> CBQueries; + MapVector<const CallBase *, QueryResolver> CBQueries; /// This is for instruction queries than scan "forward". - DenseMap<const Instruction *, QueryResolver> InstQueries; + MapVector<const Instruction *, QueryResolver> InstQueries; }; +} // namespace /// ---------------------- Assumption Propagation ------------------------------ +namespace { struct AAAssumptionInfoImpl : public AAAssumptionInfo { AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A, const DenseSet<StringRef> &Known) @@ -9938,12 +10216,13 @@ struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl { return !getAssumed().empty() || !getKnown().empty(); }; - bool AllCallSitesKnown; + bool UsedAssumedInformation = false; // Get the intersection of all assumptions held by this node's predecessors. // If we don't know all the call sites then this is either an entry into the // call graph or an empty node. This node is known to only contain its own // assumptions and can be propagated to its successors. - if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) + if (!A.checkForAllCallSites(CallSitePred, *this, true, + UsedAssumedInformation)) return indicatePessimisticFixpoint(); return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; @@ -10001,6 +10280,7 @@ private: return Assumptions; } }; +} // namespace AACallGraphNode *AACallEdgeIterator::operator*() const { return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>( @@ -10023,6 +10303,7 @@ const char AANoReturn::ID = 0; const char AAIsDead::ID = 0; const char AADereferenceable::ID = 0; const char AAAlign::ID = 0; +const char AAInstanceInfo::ID = 0; const char AANoCapture::ID = 0; const char AAValueSimplify::ID = 0; const char AAHeapToStack::ID = 0; @@ -10030,7 +10311,7 @@ const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; -const char AAPotentialValues::ID = 0; +const char AAPotentialConstantValues::ID = 0; const char AANoUndef::ID = 0; const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; @@ -10145,9 +10426,10 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 7c178f9a9834..9e27ae49a901 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -135,7 +135,8 @@ void BlockExtractor::loadFile() { if (LineSplit.empty()) continue; if (LineSplit.size()!=2) - report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); + report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'", + /*GenCrashDiag=*/false); SmallVector<StringRef, 4> BBNames; LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); @@ -194,13 +195,15 @@ bool BlockExtractor::runOnModule(Module &M) { for (const auto &BInfo : BlocksByName) { Function *F = M.getFunction(BInfo.first); if (!F) - report_fatal_error("Invalid function name specified in the input file"); + report_fatal_error("Invalid function name specified in the input file", + /*GenCrashDiag=*/false); for (const auto &BBInfo : BInfo.second) { auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { return BB.getName().equals(BBInfo); }); if (Res == F->end()) - report_fatal_error("Invalid block name specified in the input file"); + report_fatal_error("Invalid block name specified in the input file", + /*GenCrashDiag=*/false); GroupsOfBlocks[NextGroupIdx].push_back(&*Res); } ++NextGroupIdx; @@ -212,7 +215,7 @@ bool BlockExtractor::runOnModule(Module &M) { for (BasicBlock *BB : BBs) { // Check if the module contains BB. if (BB->getParent()->getParent() != &M) - report_fatal_error("Invalid basic block"); + report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false); LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting " << BB->getParent()->getName() << ":" << BB->getName() << "\n"); diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 927dceec8865..64bfcb2a9a9f 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -19,11 +19,13 @@ #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Analysis/SparsePropagation.h" #include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" + using namespace llvm; #define DEBUG_TYPE "called-value-propagation" @@ -68,7 +70,7 @@ public: } }; - CVPLatticeVal() : LatticeState(Undefined) {} + CVPLatticeVal() = default; CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {} CVPLatticeVal(std::vector<Function *> &&Functions) : LatticeState(FunctionSet), Functions(std::move(Functions)) { @@ -94,7 +96,7 @@ public: private: /// Holds the state this lattice value is in. - CVPLatticeStateTy LatticeState; + CVPLatticeStateTy LatticeState = Undefined; /// Holds functions indicating the possible targets of call sites. This set /// is empty for lattice values in the undefined, overdefined, and untracked diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 178d3f41963e..73af30ece47c 100644 --- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -85,7 +85,7 @@ static void copyDebugLocMetadata(const GlobalVariable *From, } static Align getAlign(GlobalVariable *GV) { - return GV->getAlign().getValueOr( + return GV->getAlign().value_or( GV->getParent()->getDataLayout().getPreferredAlign(GV)); } diff --git a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp index 2fe9a59ad210..dfe33ac9da0d 100644 --- a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -15,21 +15,16 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" using namespace llvm; diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 2a6e38b0437f..99fa4baf355d 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -16,18 +16,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -44,9 +43,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <cassert> -#include <cstdint> #include <utility> #include <vector> @@ -55,36 +54,36 @@ using namespace llvm; #define DEBUG_TYPE "deadargelim" STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); -STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); -STATISTIC(NumArgumentsReplacedWithUndef, - "Number of unread args replaced with undef"); +STATISTIC(NumRetValsEliminated, "Number of unused return values removed"); +STATISTIC(NumArgumentsReplacedWithPoison, + "Number of unread args replaced with poison"); namespace { - /// DAE - The dead argument elimination pass. - class DAE : public ModulePass { - protected: - // DAH uses this to specify a different ID. - explicit DAE(char &ID) : ModulePass(ID) {} +/// The dead argument elimination pass. +class DAE : public ModulePass { +protected: + // DAH uses this to specify a different ID. + explicit DAE(char &ID) : ModulePass(ID) {} - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(ID) { - initializeDAEPass(*PassRegistry::getPassRegistry()); - } + DAE() : ModulePass(ID) { + initializeDAEPass(*PassRegistry::getPassRegistry()); + } - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - DeadArgumentEliminationPass DAEP(ShouldHackArguments()); - ModuleAnalysisManager DummyMAM; - PreservedAnalyses PA = DAEP.run(M, DummyMAM); - return !PA.areAllPreserved(); - } + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + DeadArgumentEliminationPass DAEP(shouldHackArguments()); + ModuleAnalysisManager DummyMAM; + PreservedAnalyses PA = DAEP.run(M, DummyMAM); + return !PA.areAllPreserved(); + } - virtual bool ShouldHackArguments() const { return false; } - }; + virtual bool shouldHackArguments() const { return false; } +}; } // end anonymous namespace @@ -94,51 +93,51 @@ INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) namespace { - /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but - /// deletes arguments to functions which are external. This is only for use - /// by bugpoint. - struct DAH : public DAE { - static char ID; +/// The DeadArgumentHacking pass, same as dead argument elimination, but deletes +/// arguments to functions which are external. This is only for use by bugpoint. +struct DAH : public DAE { + static char ID; - DAH() : DAE(ID) {} + DAH() : DAE(ID) {} - bool ShouldHackArguments() const override { return true; } - }; + bool shouldHackArguments() const override { return true; } +}; } // end anonymous namespace char DAH::ID = 0; INITIALIZE_PASS(DAH, "deadarghaX0r", - "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", - false, false) + "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false, + false) -/// createDeadArgEliminationPass - This pass removes arguments from functions -/// which are not used by the body of the function. +/// This pass removes arguments from functions which are not used by the body of +/// the function. ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } -/// DeleteDeadVarargs - If this is an function that takes a ... list, and if -/// llvm.vastart is never called, the varargs list is dead for the function. -bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { - assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!"); - if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; +/// If this is an function that takes a ... list, and if llvm.vastart is never +/// called, the varargs list is dead for the function. +bool DeadArgumentEliminationPass::deleteDeadVarargs(Function &F) { + assert(F.getFunctionType()->isVarArg() && "Function isn't varargs!"); + if (F.isDeclaration() || !F.hasLocalLinkage()) + return false; // Ensure that the function is only directly called. - if (Fn.hasAddressTaken()) + if (F.hasAddressTaken()) return false; // Don't touch naked functions. The assembly might be using an argument, or // otherwise rely on the frame layout in a way that this analysis will not // see. - if (Fn.hasFnAttribute(Attribute::Naked)) { + if (F.hasFnAttribute(Attribute::Naked)) { return false; } // Okay, we know we can transform this function if safe. Scan its body // looking for calls marked musttail or calls to llvm.vastart. - for (BasicBlock &BB : Fn) { + for (BasicBlock &BB : F) { for (Instruction &I : BB) { CallInst *CI = dyn_cast<CallInst>(&I); if (!CI) @@ -157,25 +156,24 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. - FunctionType *FTy = Fn.getFunctionType(); + FunctionType *FTy = F.getFunctionType(); std::vector<Type *> Params(FTy->param_begin(), FTy->param_end()); - FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), - Params, false); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... - Function *NF = Function::Create(NFTy, Fn.getLinkage(), Fn.getAddressSpace()); - NF->copyAttributesFrom(&Fn); - NF->setComdat(Fn.getComdat()); - Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); - NF->takeName(&Fn); + Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace()); + NF->copyAttributesFrom(&F); + NF->setComdat(F.getComdat()); + F.getParent()->getFunctionList().insert(F.getIterator(), NF); + NF->takeName(&F); - // Loop over all of the callers of the function, transforming the call sites + // Loop over all the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // std::vector<Value *> Args; - for (User *U : llvm::make_early_inc_range(Fn.users())) { + for (User *U : llvm::make_early_inc_range(F.users())) { CallBase *CB = dyn_cast<CallBase>(U); if (!CB) continue; @@ -189,7 +187,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { SmallVector<AttributeSet, 8> ArgAttrs; for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) ArgAttrs.push_back(PAL.getParamAttrs(ArgNo)); - PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(), + PAL = AttributeList::get(F.getContext(), PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs); } @@ -224,64 +222,67 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. - NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); + NF->getBasicBlockList().splice(NF->begin(), F.getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to - // the new arguments, also transferring over the names as well. While we're at - // it, remove the dead arguments from the DeadArguments list. - for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), - I2 = NF->arg_begin(); I != E; ++I, ++I2) { + // the new arguments, also transferring over the names as well. While we're + // at it, remove the dead arguments from the DeadArguments list. + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(), + I2 = NF->arg_begin(); + I != E; ++I, ++I2) { // Move the name and users over to the new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); } - // Clone metadatas from the old function, including debug info descriptor. + // Clone metadata from the old function, including debug info descriptor. SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - Fn.getAllMetadata(MDs); + F.getAllMetadata(MDs); for (auto MD : MDs) NF->addMetadata(MD.first, *MD.second); // Fix up any BlockAddresses that refer to the function. - Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); + F.replaceAllUsesWith(ConstantExpr::getBitCast(NF, F.getType())); // Delete the bitcast that we just created, so that NF does not // appear to be address-taken. NF->removeDeadConstantUsers(); // Finally, nuke the old function. - Fn.eraseFromParent(); + F.eraseFromParent(); return true; } -/// RemoveDeadArgumentsFromCallers - Checks if the given function has any -/// arguments that are unused, and changes the caller parameters to be undefined -/// instead. -bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { +/// Checks if the given function has any arguments that are unused, and changes +/// the caller parameters to be poison instead. +bool DeadArgumentEliminationPass::removeDeadArgumentsFromCallers(Function &F) { // We cannot change the arguments if this TU does not define the function or // if the linker may choose a function body from another TU, even if the // nominal linkage indicates that other copies of the function have the same // semantics. In the below example, the dead load from %p may not have been - // eliminated from the linker-chosen copy of f, so replacing %p with undef + // eliminated from the linker-chosen copy of f, so replacing %p with poison // in callers may introduce undefined behavior. // // define linkonce_odr void @f(i32* %p) { // %v = load i32 %p // ret void // } - if (!Fn.hasExactDefinition()) + if (!F.hasExactDefinition()) return false; - // Functions with local linkage should already have been handled, except the - // fragile (variadic) ones which we can improve here. - if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) + // Functions with local linkage should already have been handled, except if + // they are fully alive (e.g., called indirectly) and except for the fragile + // (variadic) ones. In these cases, we may still be able to improve their + // statically known call sites. + if ((F.hasLocalLinkage() && !LiveFunctions.count(&F)) && + !F.getFunctionType()->isVarArg()) return false; // Don't touch naked functions. The assembly might be using an argument, or // otherwise rely on the frame layout in a way that this analysis will not // see. - if (Fn.hasFnAttribute(Attribute::Naked)) + if (F.hasFnAttribute(Attribute::Naked)) return false; - if (Fn.use_empty()) + if (F.use_empty()) return false; SmallVector<unsigned, 8> UnusedArgs; @@ -289,35 +290,36 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { AttributeMask UBImplyingAttributes = AttributeFuncs::getUBImplyingAttributes(); - for (Argument &Arg : Fn.args()) { + for (Argument &Arg : F.args()) { if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasPassPointeeByValueCopyAttr()) { if (Arg.isUsedByMetadata()) { - Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); + Arg.replaceAllUsesWith(PoisonValue::get(Arg.getType())); Changed = true; } UnusedArgs.push_back(Arg.getArgNo()); - Fn.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes); + F.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes); } } if (UnusedArgs.empty()) return false; - for (Use &U : Fn.uses()) { + for (Use &U : F.uses()) { CallBase *CB = dyn_cast<CallBase>(U.getUser()); - if (!CB || !CB->isCallee(&U)) + if (!CB || !CB->isCallee(&U) || + CB->getFunctionType() != F.getFunctionType()) continue; - // Now go through all unused args and replace them with "undef". + // Now go through all unused args and replace them with poison. for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { unsigned ArgNo = UnusedArgs[I]; Value *Arg = CB->getArgOperand(ArgNo); - CB->setArgOperand(ArgNo, UndefValue::get(Arg->getType())); + CB->setArgOperand(ArgNo, PoisonValue::get(Arg->getType())); CB->removeParamAttrs(ArgNo, UBImplyingAttributes); - ++NumArgumentsReplacedWithUndef; + ++NumArgumentsReplacedWithPoison; Changed = true; } } @@ -328,16 +330,15 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { /// Convenience function that returns the number of return values. It returns 0 /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. -static unsigned NumRetVals(const Function *F) { +static unsigned numRetVals(const Function *F) { Type *RetTy = F->getReturnType(); if (RetTy->isVoidTy()) return 0; - else if (StructType *STy = dyn_cast<StructType>(RetTy)) + if (StructType *STy = dyn_cast<StructType>(RetTy)) return STy->getNumElements(); - else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy)) return ATy->getNumElements(); - else - return 1; + return 1; } /// Returns the sub-type a function will return at a given Idx. Should @@ -349,20 +350,18 @@ static Type *getRetComponentType(const Function *F, unsigned Idx) { if (StructType *STy = dyn_cast<StructType>(RetTy)) return STy->getElementType(Idx); - else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy)) return ATy->getElementType(); - else - return RetTy; + return RetTy; } -/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not -/// live, it adds Use to the MaybeLiveUses argument. Returns the determined -/// liveness of Use. +/// Checks Use for liveness in LiveValues. If Use is not live, it adds Use to +/// the MaybeLiveUses argument. Returns the determined liveness of Use. DeadArgumentEliminationPass::Liveness -DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, +DeadArgumentEliminationPass::markIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { // We're live if our use or its Function is already marked as live. - if (IsLive(Use)) + if (isLive(Use)) return Live; // We're maybe live otherwise, but remember that we must become live if @@ -371,127 +370,127 @@ DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, return MaybeLive; } -/// SurveyUse - This looks at a single use of an argument or return value -/// and determines if it should be alive or not. Adds this use to MaybeLiveUses -/// if it causes the used value to become MaybeLive. +/// Looks at a single use of an argument or return value and determines if it +/// should be alive or not. Adds this use to MaybeLiveUses if it causes the +/// used value to become MaybeLive. /// /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. DeadArgumentEliminationPass::Liveness -DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses, +DeadArgumentEliminationPass::surveyUse(const Use *U, UseVector &MaybeLiveUses, unsigned RetValNum) { - const User *V = U->getUser(); - if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { - // The value is returned from a function. It's only live when the - // function's return value is live. We use RetValNum here, for the case - // that U is really a use of an insertvalue instruction that uses the - // original Use. - const Function *F = RI->getParent()->getParent(); - if (RetValNum != -1U) { - RetOrArg Use = CreateRet(F, RetValNum); - // We might be live, depending on the liveness of Use. - return MarkIfNotLive(Use, MaybeLiveUses); - } else { - DeadArgumentEliminationPass::Liveness Result = MaybeLive; - for (unsigned Ri = 0; Ri < NumRetVals(F); ++Ri) { - RetOrArg Use = CreateRet(F, Ri); - // We might be live, depending on the liveness of Use. If any - // sub-value is live, then the entire value is considered live. This - // is a conservative choice, and better tracking is possible. - DeadArgumentEliminationPass::Liveness SubResult = - MarkIfNotLive(Use, MaybeLiveUses); - if (Result != Live) - Result = SubResult; - } - return Result; - } + const User *V = U->getUser(); + if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { + // The value is returned from a function. It's only live when the + // function's return value is live. We use RetValNum here, for the case + // that U is really a use of an insertvalue instruction that uses the + // original Use. + const Function *F = RI->getParent()->getParent(); + if (RetValNum != -1U) { + RetOrArg Use = createRet(F, RetValNum); + // We might be live, depending on the liveness of Use. + return markIfNotLive(Use, MaybeLiveUses); + } + + DeadArgumentEliminationPass::Liveness Result = MaybeLive; + for (unsigned Ri = 0; Ri < numRetVals(F); ++Ri) { + RetOrArg Use = createRet(F, Ri); + // We might be live, depending on the liveness of Use. If any + // sub-value is live, then the entire value is considered live. This + // is a conservative choice, and better tracking is possible. + DeadArgumentEliminationPass::Liveness SubResult = + markIfNotLive(Use, MaybeLiveUses); + if (Result != Live) + Result = SubResult; } - if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { - if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() - && IV->hasIndices()) - // The use we are examining is inserted into an aggregate. Our liveness - // depends on all uses of that aggregate, but if it is used as a return - // value, only index at which we were inserted counts. - RetValNum = *IV->idx_begin(); + return Result; + } - // Note that if we are used as the aggregate operand to the insertvalue, - // we don't change RetValNum, but do survey all our uses. + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { + if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() && + IV->hasIndices()) + // The use we are examining is inserted into an aggregate. Our liveness + // depends on all uses of that aggregate, but if it is used as a return + // value, only index at which we were inserted counts. + RetValNum = *IV->idx_begin(); - Liveness Result = MaybeLive; - for (const Use &UU : IV->uses()) { - Result = SurveyUse(&UU, MaybeLiveUses, RetValNum); - if (Result == Live) - break; - } - return Result; + // Note that if we are used as the aggregate operand to the insertvalue, + // we don't change RetValNum, but do survey all our uses. + + Liveness Result = MaybeLive; + for (const Use &UU : IV->uses()) { + Result = surveyUse(&UU, MaybeLiveUses, RetValNum); + if (Result == Live) + break; } + return Result; + } - if (const auto *CB = dyn_cast<CallBase>(V)) { - const Function *F = CB->getCalledFunction(); - if (F) { - // Used in a direct call. + if (const auto *CB = dyn_cast<CallBase>(V)) { + const Function *F = CB->getCalledFunction(); + if (F) { + // Used in a direct call. - // The function argument is live if it is used as a bundle operand. - if (CB->isBundleOperand(U)) - return Live; + // The function argument is live if it is used as a bundle operand. + if (CB->isBundleOperand(U)) + return Live; - // Find the argument number. We know for sure that this use is an - // argument, since if it was the function argument this would be an - // indirect call and the we know can't be looking at a value of the - // label type (for the invoke instruction). - unsigned ArgNo = CB->getArgOperandNo(U); + // Find the argument number. We know for sure that this use is an + // argument, since if it was the function argument this would be an + // indirect call and that we know can't be looking at a value of the + // label type (for the invoke instruction). + unsigned ArgNo = CB->getArgOperandNo(U); - if (ArgNo >= F->getFunctionType()->getNumParams()) - // The value is passed in through a vararg! Must be live. - return Live; + if (ArgNo >= F->getFunctionType()->getNumParams()) + // The value is passed in through a vararg! Must be live. + return Live; - assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) && - "Argument is not where we expected it"); + assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) && + "Argument is not where we expected it"); - // Value passed to a normal call. It's only live when the corresponding - // argument to the called function turns out live. - RetOrArg Use = CreateArg(F, ArgNo); - return MarkIfNotLive(Use, MaybeLiveUses); - } + // Value passed to a normal call. It's only live when the corresponding + // argument to the called function turns out live. + RetOrArg Use = createArg(F, ArgNo); + return markIfNotLive(Use, MaybeLiveUses); } - // Used in any other way? Value must be live. - return Live; + } + // Used in any other way? Value must be live. + return Live; } -/// SurveyUses - This looks at all the uses of the given value +/// Looks at all the uses of the given value /// Returns the Liveness deduced from the uses of this value. /// /// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If /// the result is Live, MaybeLiveUses might be modified but its content should /// be ignored (since it might not be complete). DeadArgumentEliminationPass::Liveness -DeadArgumentEliminationPass::SurveyUses(const Value *V, +DeadArgumentEliminationPass::surveyUses(const Value *V, UseVector &MaybeLiveUses) { // Assume it's dead (which will only hold if there are no uses at all..). Liveness Result = MaybeLive; // Check each use. for (const Use &U : V->uses()) { - Result = SurveyUse(&U, MaybeLiveUses); + Result = surveyUse(&U, MaybeLiveUses); if (Result == Live) break; } return Result; } -// SurveyFunction - This performs the initial survey of the specified function, -// checking out whether or not it uses any of its incoming arguments or whether -// any callers use the return value. This fills in the LiveValues set and Uses -// map. -// -// We consider arguments of non-internal functions to be intrinsically alive as -// well as arguments to functions which have their "address taken". -void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { +/// Performs the initial survey of the specified function, checking out whether +/// it uses any of its incoming arguments or whether any callers use the return +/// value. This fills in the LiveValues set and Uses map. +/// +/// We consider arguments of non-internal functions to be intrinsically alive as +/// well as arguments to functions which have their "address taken". +void DeadArgumentEliminationPass::surveyFunction(const Function &F) { // Functions with inalloca/preallocated parameters are expecting args in a // particular register and memory layout. if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { - MarkLive(F); + markLive(F); return; } @@ -499,11 +498,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // otherwise rely on the frame layout in a way that this analysis will not // see. if (F.hasFnAttribute(Attribute::Naked)) { - MarkLive(F); + markLive(F); return; } - unsigned RetCount = NumRetVals(&F); + unsigned RetCount = numRetVals(&F); // Assume all return values are dead using RetVals = SmallVector<Liveness, 5>; @@ -518,20 +517,10 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { RetUses MaybeLiveRetUses(RetCount); bool HasMustTailCalls = false; - - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() - != F.getFunctionType()->getReturnType()) { - // We don't support old style multiple return values. - MarkLive(F); - return; - } - } - + for (const BasicBlock &BB : F) { // If we have any returns of `musttail` results - the signature can't // change - if (BB->getTerminatingMustTailCall() != nullptr) + if (BB.getTerminatingMustTailCall() != nullptr) HasMustTailCalls = true; } @@ -541,7 +530,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) { - MarkLive(F); + markLive(F); return; } @@ -559,8 +548,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // If the function is PASSED IN as an argument, its address has been // taken. const auto *CB = dyn_cast<CallBase>(U.getUser()); - if (!CB || !CB->isCallee(&U)) { - MarkLive(F); + if (!CB || !CB->isCallee(&U) || + CB->getFunctionType() != F.getFunctionType()) { + markLive(F); return; } @@ -577,13 +567,13 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { continue; // Check all uses of the return value. - for (const Use &U : CB->uses()) { - if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) { + for (const Use &UU : CB->uses()) { + if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(UU.getUser())) { // This use uses a part of our return value, survey the uses of // that part and store the results for this index only. unsigned Idx = *Ext->idx_begin(); if (RetValLiveness[Idx] != Live) { - RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); + RetValLiveness[Idx] = surveyUses(Ext, MaybeLiveRetUses[Idx]); if (RetValLiveness[Idx] == Live) NumLiveRetVals++; } @@ -591,16 +581,16 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Used by something else than extractvalue. Survey, but assume that the // result applies to all sub-values. UseVector MaybeLiveAggregateUses; - if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) { + if (surveyUse(&UU, MaybeLiveAggregateUses) == Live) { NumLiveRetVals = RetCount; RetValLiveness.assign(RetCount, Live); break; - } else { - for (unsigned Ri = 0; Ri != RetCount; ++Ri) { - if (RetValLiveness[Ri] != Live) - MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(), - MaybeLiveAggregateUses.end()); - } + } + + for (unsigned Ri = 0; Ri != RetCount; ++Ri) { + if (RetValLiveness[Ri] != Live) + MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(), + MaybeLiveAggregateUses.end()); } } } @@ -613,7 +603,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Now we've inspected all callers, record the liveness of our return values. for (unsigned Ri = 0; Ri != RetCount; ++Ri) - MarkValue(CreateRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]); + markValue(createRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: " << F.getName() << "\n"); @@ -641,81 +631,77 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } else { // See what the effect of this use is (recording any uses that cause // MaybeLive in MaybeLiveArgUses). - Result = SurveyUses(&*AI, MaybeLiveArgUses); + Result = surveyUses(&*AI, MaybeLiveArgUses); } // Mark the result. - MarkValue(CreateArg(&F, ArgI), Result, MaybeLiveArgUses); + markValue(createArg(&F, ArgI), Result, MaybeLiveArgUses); // Clear the vector again for the next iteration. MaybeLiveArgUses.clear(); } } -/// MarkValue - This function marks the liveness of RA depending on L. If L is -/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses, -/// such that RA will be marked live if any use in MaybeLiveUses gets marked -/// live later on. -void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L, +/// Marks the liveness of RA depending on L. If L is MaybeLive, it also takes +/// all uses in MaybeLiveUses and records them in Uses, such that RA will be +/// marked live if any use in MaybeLiveUses gets marked live later on. +void DeadArgumentEliminationPass::markValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses) { switch (L) { - case Live: - MarkLive(RA); - break; - case MaybeLive: - assert(!IsLive(RA) && "Use is already live!"); - for (const auto &MaybeLiveUse : MaybeLiveUses) { - if (IsLive(MaybeLiveUse)) { - // A use is live, so this value is live. - MarkLive(RA); - break; - } else { - // Note any uses of this value, so this value can be - // marked live whenever one of the uses becomes live. - Uses.insert(std::make_pair(MaybeLiveUse, RA)); - } + case Live: + markLive(RA); + break; + case MaybeLive: + assert(!isLive(RA) && "Use is already live!"); + for (const auto &MaybeLiveUse : MaybeLiveUses) { + if (isLive(MaybeLiveUse)) { + // A use is live, so this value is live. + markLive(RA); + break; } - break; + // Note any uses of this value, so this value can be + // marked live whenever one of the uses becomes live. + Uses.emplace(MaybeLiveUse, RA); + } + break; } } -/// MarkLive - Mark the given Function as alive, meaning that it cannot be -/// changed in any way. Additionally, -/// mark any values that are used as this function's parameters or by its return -/// values (according to Uses) live as well. -void DeadArgumentEliminationPass::MarkLive(const Function &F) { +/// Mark the given Function as alive, meaning that it cannot be changed in any +/// way. Additionally, mark any values that are used as this function's +/// parameters or by its return values (according to Uses) live as well. +void DeadArgumentEliminationPass::markLive(const Function &F) { LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Intrinsically live fn: " << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. for (unsigned ArgI = 0, E = F.arg_size(); ArgI != E; ++ArgI) - PropagateLiveness(CreateArg(&F, ArgI)); + propagateLiveness(createArg(&F, ArgI)); // Mark all return values as live. - for (unsigned Ri = 0, E = NumRetVals(&F); Ri != E; ++Ri) - PropagateLiveness(CreateRet(&F, Ri)); + for (unsigned Ri = 0, E = numRetVals(&F); Ri != E; ++Ri) + propagateLiveness(createRet(&F, Ri)); } -/// MarkLive - Mark the given return value or argument as live. Additionally, -/// mark any values that are used by this value (according to Uses) live as -/// well. -void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) { - if (IsLive(RA)) +/// Mark the given return value or argument as live. Additionally, mark any +/// values that are used by this value (according to Uses) live as well. +void DeadArgumentEliminationPass::markLive(const RetOrArg &RA) { + if (isLive(RA)) return; // Already marked Live. LiveValues.insert(RA); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking " << RA.getDescription() << " live\n"); - PropagateLiveness(RA); + propagateLiveness(RA); } -bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) { +bool DeadArgumentEliminationPass::isLive(const RetOrArg &RA) { return LiveFunctions.count(RA.F) || LiveValues.count(RA); } -/// PropagateLiveness - Given that RA is a live value, propagate it's liveness -/// to any other values it uses (according to Uses). -void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { +/// Given that RA is a live value, propagate it's liveness to any other values +/// it uses (according to Uses). +void DeadArgumentEliminationPass::propagateLiveness(const RetOrArg &RA) { // We don't use upper_bound (or equal_range) here, because our recursive call // to ourselves is likely to cause the upper_bound (which is the first value // not belonging to RA) to become erased and the iterator invalidated. @@ -723,18 +709,17 @@ void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) { UseMap::iterator E = Uses.end(); UseMap::iterator I; for (I = Begin; I != E && I->first == RA; ++I) - MarkLive(I->second); + markLive(I->second); // Erase RA from the Uses map (from the lower bound to wherever we ended up // after the loop). Uses.erase(Begin, I); } -// RemoveDeadStuffFromFunction - Remove any arguments and return values from F -// that are not in LiveValues. Transform the function and all of the callees of -// the function to not have these arguments and return values. -// -bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { +/// Remove any arguments and return values from F that are not in LiveValues. +/// Transform the function and all the callees of the function to not have these +/// arguments and return values. +bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) { // Don't modify fully live functions if (LiveFunctions.count(F)) return false; @@ -742,7 +727,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. FunctionType *FTy = F->getFunctionType(); - std::vector<Type*> Params; + std::vector<Type *> Params; // Keep track of if we have a live 'returned' argument bool HasLiveReturnedArg = false; @@ -759,7 +744,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { unsigned ArgI = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgI) { - RetOrArg Arg = CreateArg(F, ArgI); + RetOrArg Arg = createArg(F, ArgI); if (LiveValues.erase(Arg)) { Params.push_back(I->getType()); ArgAlive[ArgI] = true; @@ -776,11 +761,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); Type *NRetTy = nullptr; - unsigned RetCount = NumRetVals(F); + unsigned RetCount = numRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); - std::vector<Type*> RetTypes; + std::vector<Type *> RetTypes; // If there is a function with a live 'returned' argument but a dead return // value, then there are two possible actions: @@ -792,9 +777,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // It's not clear in the general case which option is more profitable because, // even in the absence of explicit uses of the return value, code generation // is free to use the 'returned' attribute to do things like eliding - // save/restores of registers across calls. Whether or not this happens is - // target and ABI-specific as well as depending on the amount of register - // pressure, so there's no good way for an IR-level pass to figure this out. + // save/restores of registers across calls. Whether this happens is target and + // ABI-specific as well as depending on the amount of register pressure, so + // there's no good way for an IR-level pass to figure this out. // // Fortunately, the only places where 'returned' is currently generated by // the FE are places where 'returned' is basically free and almost always a @@ -806,7 +791,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { } else { // Look at each of the original return values individually. for (unsigned Ri = 0; Ri != RetCount; ++Ri) { - RetOrArg Ret = CreateRet(F, Ri); + RetOrArg Ret = createRet(F, Ri); if (LiveValues.erase(Ret)) { RetTypes.push_back(getRetComponentType(F, Ri)); NewRetIdxs[Ri] = RetTypes.size() - 1; @@ -879,9 +864,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Loop over all of the callers of the function, transforming the call sites - // to pass in a smaller number of arguments into the new function. - std::vector<Value*> Args; + // Loop over all the callers of the function, transforming the call sites to + // pass in a smaller number of arguments into the new function. + std::vector<Value *> Args; while (!F->use_empty()) { CallBase &CB = cast<CallBase>(*F->user_back()); @@ -896,7 +881,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. - auto I = CB.arg_begin(); + auto *I = CB.arg_begin(); unsigned Pi = 0; // Loop over those operands, corresponding to the normal arguments to the // original function, and add those that are still alive. @@ -909,11 +894,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // If the return type has changed, then get rid of 'returned' on the // call site. The alternative is to make all 'returned' attributes on // call sites keep the return value alive just like 'returned' - // attributes on function declaration but it's less clearly a win and + // attributes on function declaration, but it's less clearly a win and // this is not an expected case anyway ArgAttrVec.push_back(AttributeSet::get( - F->getContext(), - AttrBuilder(F->getContext(), Attrs).removeAttribute(Attribute::Returned))); + F->getContext(), AttrBuilder(F->getContext(), Attrs) + .removeAttribute(Attribute::Returned))); } else { // Otherwise, use the original attributes. ArgAttrVec.push_back(Attrs); @@ -921,7 +906,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { } // Push any varargs arguments on the list. Don't forget their attributes. - for (auto E = CB.arg_end(); I != E; ++I, ++Pi) { + for (auto *E = CB.arg_end(); I != E; ++I, ++Pi) { Args.push_back(*I); ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi)); } @@ -934,8 +919,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute( F->getContext(), Attribute::AllocSize); - AttributeList NewCallPAL = AttributeList::get( - F->getContext(), FnAttrs, RetAttrs, ArgAttrVec); + AttributeList NewCallPAL = + AttributeList::get(F->getContext(), FnAttrs, RetAttrs, ArgAttrVec); SmallVector<OperandBundleDef, 1> OpBundles; CB.getOperandBundlesAsDefs(OpBundles); @@ -961,10 +946,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { CB.replaceAllUsesWith(NewCB); NewCB->takeName(&CB); } else if (NewCB->getType()->isVoidTy()) { - // If the return value is dead, replace any uses of it with undef + // If the return value is dead, replace any uses of it with poison // (any non-debug value uses will get removed later on). if (!CB.getType()->isX86_MMXTy()) - CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } else { assert((RetTy->isStructTy() || RetTy->isArrayTy()) && "Return type changed, but not into a void. The old return type" @@ -980,8 +965,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // with all the uses, we will just rebuild it using extract/insertvalue // chaining and let instcombine clean that up. // - // Start out building up our return value from undef - Value *RetVal = UndefValue::get(RetTy); + // Start out building up our return value from poison + Value *RetVal = PoisonValue::get(RetTy); for (unsigned Ri = 0; Ri != RetCount; ++Ri) if (NewRetIdxs[Ri] != -1) { Value *V; @@ -1026,10 +1011,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { I2->takeName(&*I); ++I2; } else { - // If this argument is dead, replace any uses of it with undef + // If this argument is dead, replace any uses of it with poison // (any non-debug value uses will get removed later on). if (!I->getType()->isX86_MMXTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); } // If we change the return value of the function we must rewrite any return @@ -1048,8 +1033,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // This does generate messy code, but we'll let it to instcombine to // clean that up. Value *OldRet = RI->getOperand(0); - // Start out building up our return value from undef - RetVal = UndefValue::get(NRetTy); + // Start out building up our return value from poison + RetVal = PoisonValue::get(NRetTy); for (unsigned RetI = 0; RetI != RetCount; ++RetI) if (NewRetIdxs[RetI] != -1) { Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret"); @@ -1074,12 +1059,22 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { BB.getInstList().erase(RI); } - // Clone metadatas from the old function, including debug info descriptor. + // Clone metadata from the old function, including debug info descriptor. SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; F->getAllMetadata(MDs); for (auto MD : MDs) NF->addMetadata(MD.first, *MD.second); + // If either the return value(s) or argument(s) are removed, then probably the + // function does not follow standard calling conventions anymore. Hence, add + // DW_CC_nocall to DISubroutineType to inform debugger that it may not be safe + // to call this function or try to interpret the return value. + if (NFTy != FTy && NF->getSubprogram()) { + DISubprogram *SP = NF->getSubprogram(); + auto Temp = SP->getType()->cloneWithCC(llvm::dwarf::DW_CC_nocall); + SP->replaceType(MDNode::replaceWithPermanent(std::move(Temp))); + } + // Now that the old function is dead, delete it. F->eraseFromParent(); @@ -1097,26 +1092,25 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M, LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n"); for (Function &F : llvm::make_early_inc_range(M)) if (F.getFunctionType()->isVarArg()) - Changed |= DeleteDeadVarargs(F); + Changed |= deleteDeadVarargs(F); - // Second phase:loop through the module, determining which arguments are live. - // We assume all arguments are dead unless proven otherwise (allowing us to - // determine that dead arguments passed into recursive functions are dead). - // + // Second phase: Loop through the module, determining which arguments are + // live. We assume all arguments are dead unless proven otherwise (allowing us + // to determine that dead arguments passed into recursive functions are dead). LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Determining liveness\n"); for (auto &F : M) - SurveyFunction(F); + surveyFunction(F); // Now, remove all dead arguments and return values from each function in // turn. We use make_early_inc_range here because functions will probably get // removed (i.e. replaced by new ones). for (Function &F : llvm::make_early_inc_range(M)) - Changed |= RemoveDeadStuffFromFunction(&F); + Changed |= removeDeadStuffFromFunction(&F); // Finally, look for any unused parameters in functions with non-local - // linkage and replace the passed in parameters with undef. + // linkage and replace the passed in parameters with poison. for (auto &F : M) - Changed |= RemoveDeadArgumentsFromCallers(F); + Changed |= removeDeadArgumentsFromCallers(F); if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp index 387f114f6ffa..84280781ee70 100644 --- a/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp index 16d00a0c89e1..b10c2ea13469 100644 --- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -8,9 +8,9 @@ #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 213a998d5bba..49077f92884f 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -30,7 +30,6 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -45,6 +44,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -69,6 +69,7 @@ using namespace llvm; #define DEBUG_TYPE "function-attrs" +STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly"); STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); @@ -121,28 +122,28 @@ using SCCNodeSet = SmallSetVector<Function *, 8>; /// result will be based only on AA results for the function declaration; it /// will be assumed that some other (perhaps less optimized) version of the /// function may be selected at link time. -static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, - AAResults &AAR, - const SCCNodeSet &SCCNodes) { +static FunctionModRefBehavior +checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, + const SCCNodeSet &SCCNodes) { FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); if (MRB == FMRB_DoesNotAccessMemory) // Already perfect! - return MAK_ReadNone; + return MRB; - if (!ThisBody) { - if (AliasAnalysis::onlyReadsMemory(MRB)) - return MAK_ReadOnly; - - if (AliasAnalysis::onlyWritesMemory(MRB)) - return MAK_WriteOnly; - - // Conservatively assume it reads and writes to memory. - return MAK_MayWrite; - } + if (!ThisBody) + return MRB; // Scan the function body for instructions that may read or write memory. bool ReadsMemory = false; bool WritesMemory = false; + // Track if the function accesses memory not based on pointer arguments or + // allocas. + bool AccessesNonArgsOrAlloca = false; + // Returns true if Ptr is not based on a function argument. + auto IsArgumentOrAlloca = [](const Value *Ptr) { + const Value *UO = getUnderlyingObject(Ptr); + return isa<Argument>(UO) || isa<AllocaInst>(UO); + }; for (Instruction &I : instructions(F)) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. @@ -175,6 +176,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, // If it reads, note it. if (isRefSet(MRI)) ReadsMemory = true; + AccessesNonArgsOrAlloca = true; continue; } @@ -187,12 +189,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata()); - // Skip accesses to local or constant memory as they don't impact the // externally visible mod/ref behavior. if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); + if (isModSet(MRI)) // Writes non-local memory. WritesMemory = true; @@ -202,24 +205,29 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, } continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { + MemoryLocation Loc = MemoryLocation::get(LI); // Ignore non-volatile loads from local memory. (Atomic is okay here.) - if (!LI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(LI); - if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } + if (!LI->isVolatile() && + AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { + MemoryLocation Loc = MemoryLocation::get(SI); // Ignore non-volatile stores to local memory. (Atomic is okay here.) - if (!SI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(SI); - if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } + if (!SI->isVolatile() && + AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); } else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) { // Ignore vaargs on local memory. MemoryLocation Loc = MemoryLocation::get(VI); if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; + AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr); + } else { + // If AccessesNonArgsOrAlloca has not been updated above, set it + // conservatively. + AccessesNonArgsOrAlloca |= I.mayReadOrWriteMemory(); } // Any remaining instructions need to be taken seriously! Check if they @@ -232,61 +240,74 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, ReadsMemory |= I.mayReadFromMemory(); } - if (WritesMemory) { - if (!ReadsMemory) - return MAK_WriteOnly; - else - return MAK_MayWrite; - } + if (!WritesMemory && !ReadsMemory) + return FMRB_DoesNotAccessMemory; - return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; + FunctionModRefBehavior Result = FunctionModRefBehavior(FMRL_Anywhere); + if (!AccessesNonArgsOrAlloca) + Result = FunctionModRefBehavior(FMRL_ArgumentPointees); + if (WritesMemory) + Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Mod)); + if (ReadsMemory) + Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Ref)); + return Result; } -MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, - AAResults &AAR) { +FunctionModRefBehavior llvm::computeFunctionBodyMemoryAccess(Function &F, + AAResults &AAR) { return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); } -/// Deduce readonly/readnone attributes for the SCC. +/// Deduce readonly/readnone/writeonly attributes for the SCC. template <typename AARGetterT> -static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, - SmallSet<Function *, 8> &Changed) { +static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, + SmallSet<Function *, 8> &Changed) { // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; bool WritesMemory = false; + // Check if all functions only access memory through their arguments. + bool ArgMemOnly = true; for (Function *F : SCCNodes) { // Call the callable parameter to look up AA results for this function. AAResults &AAR = AARGetter(*F); - // Non-exact function definitions may not be selected at link time, and an // alternative version that writes to memory may be selected. See the // comment on GlobalValue::isDefinitionExact for more details. - switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), - AAR, SCCNodes)) { - case MAK_MayWrite: + FunctionModRefBehavior FMRB = + checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); + if (FMRB == FMRB_DoesNotAccessMemory) + continue; + ModRefInfo MR = createModRefInfo(FMRB); + ReadsMemory |= isRefSet(MR); + WritesMemory |= isModSet(MR); + ArgMemOnly &= AliasAnalysis::onlyAccessesArgPointees(FMRB); + // Reached neither readnone, readonly, writeonly nor argmemonly can be + // inferred. Exit. + if (ReadsMemory && WritesMemory && !ArgMemOnly) return; - case MAK_ReadOnly: - ReadsMemory = true; - break; - case MAK_WriteOnly: - WritesMemory = true; - break; - case MAK_ReadNone: - // Nothing to do! - break; - } } - // If the SCC contains both functions that read and functions that write, then - // we cannot add readonly attributes. - if (ReadsMemory && WritesMemory) - return; - - // Success! Functions in this SCC do not access memory, or only read memory. - // Give them the appropriate attribute. + assert((!ReadsMemory || !WritesMemory || ArgMemOnly) && + "no memory attributes can be added for this SCC, should have exited " + "earlier"); + // Success! Functions in this SCC do not access memory, only read memory, + // only write memory, or only access memory through its arguments. Give them + // the appropriate attribute. for (Function *F : SCCNodes) { + // If possible add argmemonly attribute to F, if it accesses memory. + if (ArgMemOnly && !F->onlyAccessesArgMemory() && + (ReadsMemory || WritesMemory)) { + NumArgMemOnly++; + F->addFnAttr(Attribute::ArgMemOnly); + Changed.insert(F); + } + + // The SCC contains functions both writing and reading from memory. We + // cannot add readonly or writeonline attributes. + if (ReadsMemory && WritesMemory) + continue; if (F->doesNotAccessMemory()) // Already perfect! continue; @@ -1614,6 +1635,26 @@ static bool basicBlockCanReturn(BasicBlock &BB) { return none_of(BB, instructionDoesNotReturn); } +// FIXME: this doesn't handle recursion. +static bool canReturn(Function &F) { + SmallVector<BasicBlock *, 16> Worklist; + SmallPtrSet<BasicBlock *, 16> Visited; + + Visited.insert(&F.front()); + Worklist.push_back(&F.front()); + + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (basicBlockCanReturn(*BB)) + return true; + for (BasicBlock *Succ : successors(BB)) + if (Visited.insert(Succ).second) + Worklist.push_back(Succ); + } while (!Worklist.empty()); + + return false; +} + // Set the noreturn function attribute if possible. static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed) { @@ -1622,9 +1663,7 @@ static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, F->doesNotReturn()) continue; - // The function can return if any basic blocks can return. - // FIXME: this doesn't handle recursion or unreachable blocks. - if (none_of(*F, basicBlockCanReturn)) { + if (!canReturn(*F)) { F->setDoesNotReturn(); Changed.insert(F); } @@ -1792,7 +1831,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) { SmallSet<Function *, 8> Changed; addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); - addReadAttrs(Nodes.SCCNodes, AARGetter, Changed); + addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); addArgumentAttrs(Nodes.SCCNodes, Changed); inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); @@ -1896,6 +1935,7 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { char PostOrderFunctionAttrsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", @@ -1975,12 +2015,13 @@ static bool addNoRecurseAttrsTopDown(Function &F) { // this function could be recursively (indirectly) called. Note that this // also detects if F is directly recursive as F is not yet marked as // a norecurse function. - for (auto *U : F.users()) { - auto *I = dyn_cast<Instruction>(U); + for (auto &U : F.uses()) { + auto *I = dyn_cast<Instruction>(U.getUser()); if (!I) return false; CallBase *CB = dyn_cast<CallBase>(I); - if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) + if (!CB || !CB->isCallee(&U) || + !CB->getParent()->getParent()->doesNotRecurse()) return false; } F.setDoesNotRecurse(); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index d9b43109f629..56e2df14ff38 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" @@ -33,8 +32,6 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/InitializePasses.h" #include "llvm/Linker/IRMover.h" -#include "llvm/Object/ModuleSymbolTable.h" -#include "llvm/Object/SymbolicFile.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -1112,12 +1109,13 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, llvm_unreachable("Expected GV to be converted"); } else { // If all copies of the original symbol had global unnamed addr and - // linkonce_odr linkage, it should be an auto hide symbol. In that case - // the thin link would have marked it as CanAutoHide. Add hidden visibility - // to the symbol to preserve the property. + // linkonce_odr linkage, or if all of them had local unnamed addr linkage + // and are constants, then it should be an auto hide symbol. In that case + // the thin link would have marked it as CanAutoHide. Add hidden + // visibility to the symbol to preserve the property. if (NewLinkage == GlobalValue::WeakODRLinkage && GS->second->canAutoHide()) { - assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr()); + assert(GV.canBeOmittedFromSymbolTable()); GV.setVisibility(GlobalValue::HiddenVisibility); } @@ -1330,10 +1328,9 @@ Expected<bool> FunctionImporter::importFunctions( << " from " << SrcModule->getSourceFileName() << "\n"; } - if (Error Err = Mover.move( - std::move(SrcModule), GlobalsToImport.getArrayRef(), - [](GlobalValue &, IRMover::ValueAdder) {}, - /*IsPerformingImport=*/true)) + if (Error Err = Mover.move(std::move(SrcModule), + GlobalsToImport.getArrayRef(), nullptr, + /*IsPerformingImport=*/true)) report_fatal_error(Twine("Function Import: link error: ") + toString(std::move(Err))); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 6c3cc3914337..dafd0dc865a2 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -19,11 +19,8 @@ // Current limitations: // - It does not yet handle integer ranges. We do support "literal constants", // but that's off by default under an option. -// - Only 1 argument per function is specialised, // - The cost-model could be further looked into (it mainly focuses on inlining // benefits), -// - We are not yet caching analysis results, but profiling and checking where -// extra compile time is spent didn't suggest this to be a problem. // // Ideas: // - With a function specialization attribute for arguments, we could have @@ -49,15 +46,16 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueLattice.h" +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include <cmath> @@ -98,8 +96,13 @@ static cl::opt<bool> SpecializeOnAddresses( "func-specialization-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values")); -// TODO: This needs checking to see the impact on compile-times, which is why -// this is off by default for now. +// Disabled by default as it can significantly increase compilation times. +// Running nikic's compile time tracker on x86 with instruction count as the +// metric shows 3-4% regression for SPASS while being neutral for all other +// benchmarks of the llvm test suite. +// +// https://llvm-compile-time-tracker.com +// https://github.com/nikic/llvm-compile-time-tracker static cl::opt<bool> EnableSpecializationForLiteralConstant( "function-specialization-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant " @@ -108,24 +111,18 @@ static cl::opt<bool> EnableSpecializationForLiteralConstant( namespace { // Bookkeeping struct to pass data from the analysis and profitability phase // to the actual transform helper functions. -struct ArgInfo { - Function *Fn; // The function to perform specialisation on. - Argument *Arg; // The Formal argument being analysed. - Constant *Const; // A corresponding actual constant argument. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. - - // Flag if this will be a partial specialization, in which case we will need - // to keep the original function around in addition to the added - // specializations. - bool Partial = false; - - ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G) - : Fn(F), Arg(A), Const(C), Gain(G){}; +struct SpecializationInfo { + SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain; // Profitability: Gain = Bonus - Cost. }; } // Anonymous namespace using FuncList = SmallVectorImpl<Function *>; -using ConstList = SmallVectorImpl<Constant *>; +using CallArgBinding = std::pair<CallBase *, Constant *>; +using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>; +// We are using MapVector because it guarantees deterministic iteration +// order across executions. +using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>; // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the @@ -204,41 +201,45 @@ static Constant *getConstantStackValue(CallInst *Call, Value *Val, // ret void // } // -static void constantArgPropagation(FuncList &WorkList, - Module &M, SCCPSolver &Solver) { +static void constantArgPropagation(FuncList &WorkList, Module &M, + SCCPSolver &Solver) { // Iterate over the argument tracked functions see if there // are any new constant values for the call instruction via // stack variables. for (auto *F : WorkList) { - // TODO: Generalize for any read only arguments. - if (F->arg_size() != 1) - continue; - - auto &Arg = *F->arg_begin(); - if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy()) - continue; for (auto *User : F->users()) { + auto *Call = dyn_cast<CallInst>(User); if (!Call) - break; - auto *ArgOp = Call->getArgOperand(0); - auto *ArgOpType = ArgOp->getType(); - auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); - if (!ConstVal) - break; + continue; - Value *GV = new GlobalVariable(M, ConstVal->getType(), true, - GlobalValue::InternalLinkage, ConstVal, - "funcspec.arg"); + bool Changed = false; + for (const Use &U : Call->args()) { + unsigned Idx = Call->getArgOperandNo(&U); + Value *ArgOp = Call->getArgOperand(Idx); + Type *ArgOpType = ArgOp->getType(); - if (ArgOpType != ConstVal->getType()) - GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType()); + if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) + continue; + + auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); + if (!ConstVal) + continue; + + Value *GV = new GlobalVariable(M, ConstVal->getType(), true, + GlobalValue::InternalLinkage, ConstVal, + "funcspec.arg"); + if (ArgOpType != ConstVal->getType()) + GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType); - Call->setArgOperand(0, GV); + Call->setArgOperand(Idx, GV); + Changed = true; + } // Add the changed CallInst to Solver Worklist - Solver.visitCall(*Call); + if (Changed) + Solver.visitCall(*Call); } } } @@ -275,7 +276,10 @@ class FunctionSpecializer { std::function<TargetTransformInfo &(Function &)> GetTTI; std::function<TargetLibraryInfo &(Function &)> GetTLI; - SmallPtrSet<Function *, 2> SpecializedFuncs; + SmallPtrSet<Function *, 4> SpecializedFuncs; + SmallPtrSet<Function *, 4> FullySpecialized; + SmallVector<Instruction *> ReplacedWithConstant; + DenseMap<Function *, CodeMetrics> FunctionMetrics; public: FunctionSpecializer(SCCPSolver &Solver, @@ -284,42 +288,66 @@ public: std::function<TargetLibraryInfo &(Function &)> GetTLI) : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {} + ~FunctionSpecializer() { + // Eliminate dead code. + removeDeadInstructions(); + removeDeadFunctions(); + } + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// /// \returns true if at least one function is specialized. - bool - specializeFunctions(FuncList &FuncDecls, - FuncList &CurrentSpecializations) { + bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) { bool Changed = false; - for (auto *F : FuncDecls) { - if (!isCandidateFunction(F, CurrentSpecializations)) + for (auto *F : Candidates) { + if (!isCandidateFunction(F)) continue; auto Cost = getSpecializationCost(F); if (!Cost.isValid()) { LLVM_DEBUG( - dbgs() << "FnSpecialization: Invalid specialisation cost.\n"); + dbgs() << "FnSpecialization: Invalid specialization cost.\n"); continue; } - auto ConstArgs = calculateGains(F, Cost); - if (ConstArgs.empty()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " + << F->getName() << " is " << Cost << "\n"); + + SmallVector<CallSpecBinding, 8> Specializations; + if (!calculateGains(F, Cost, Specializations)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n"); continue; } - for (auto &CA : ConstArgs) { - specializeFunction(CA, CurrentSpecializations); - Changed = true; - } + Changed = true; + for (auto &Entry : Specializations) + specializeFunction(F, Entry.second, WorkList); } - updateSpecializedFuncs(FuncDecls, CurrentSpecializations); + updateSpecializedFuncs(Candidates, WorkList); NumFuncSpecialized += NbFunctionsSpecialized; return Changed; } + void removeDeadInstructions() { + for (auto *I : ReplacedWithConstant) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I + << "\n"); + I->eraseFromParent(); + } + ReplacedWithConstant.clear(); + } + + void removeDeadFunctions() { + for (auto *F : FullySpecialized) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " + << F->getName() << "\n"); + F->eraseFromParent(); + } + FullySpecialized.clear(); + } + bool tryToReplaceWithConstant(Value *V) { if (!V->getType()->isSingleValueType() || isa<CallBase>(V) || V->user_empty()) @@ -330,17 +358,26 @@ public: return false; auto *Const = isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); - V->replaceAllUsesWith(Const); - for (auto *U : Const->users()) + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V + << "\nFnSpecialization: with " << *Const << "\n"); + + // Record uses of V to avoid visiting irrelevant uses of const later. + SmallVector<Instruction *> UseInsts; + for (auto *U : V->users()) if (auto *I = dyn_cast<Instruction>(U)) if (Solver.isBlockExecutable(I->getParent())) - Solver.visit(I); + UseInsts.push_back(I); + + V->replaceAllUsesWith(Const); + + for (auto *I : UseInsts) + Solver.visit(I); // Remove the instruction from Block and Solver. if (auto *I = dyn_cast<Instruction>(V)) { if (I->isSafeToRemove()) { - I->eraseFromParent(); + ReplacedWithConstant.push_back(I); Solver.removeLatticeValueFor(I); } } @@ -352,92 +389,108 @@ private: // also in the cost model. unsigned NbFunctionsSpecialized = 0; + // Compute the code metrics for function \p F. + CodeMetrics &analyzeFunction(Function *F) { + auto I = FunctionMetrics.insert({F, CodeMetrics()}); + CodeMetrics &Metrics = I.first->second; + if (I.second) { + // The code metrics were not cached. + SmallPtrSet<const Value *, 32> EphValues; + CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); + for (BasicBlock &BB : *F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " + << F->getName() << " is " << Metrics.NumInsts + << " instructions\n"); + } + return Metrics; + } + /// Clone the function \p F and remove the ssa_copy intrinsics added by /// the SCCPSolver in the cloned version. - Function *cloneCandidateFunction(Function *F) { - ValueToValueMapTy EmptyMap; - Function *Clone = CloneFunction(F, EmptyMap); + Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { + Function *Clone = CloneFunction(F, Mappings); removeSSACopy(*Clone); return Clone; } - /// This function decides whether it's worthwhile to specialize function \p F - /// based on the known constant values its arguments can take on, i.e. it - /// calculates a gain and returns a list of actual arguments that are deemed - /// profitable to specialize. Specialization is performed on the first - /// interesting argument. Specializations based on additional arguments will - /// be evaluated on following iterations of the main IPSCCP solve loop. - SmallVector<ArgInfo> calculateGains(Function *F, InstructionCost Cost) { - SmallVector<ArgInfo> Worklist; + /// This function decides whether it's worthwhile to specialize function + /// \p F based on the known constant values its arguments can take on. It + /// only discovers potential specialization opportunities without actually + /// applying them. + /// + /// \returns true if any specializations have been found. + bool calculateGains(Function *F, InstructionCost Cost, + SmallVectorImpl<CallSpecBinding> &WorkList) { + SpecializationMap Specializations; // Determine if we should specialize the function based on the values the // argument can take on. If specialization is not profitable, we continue // on to the next argument. for (Argument &FormalArg : F->args()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: " - << FormalArg.getName() << "\n"); // Determine if this argument is interesting. If we know the argument can - // take on any constant values, they are collected in Constants. If the - // argument can only ever equal a constant value in Constants, the - // function will be completely specialized, and the IsPartial flag will - // be set to false by isArgumentInteresting (that function only adds - // values to the Constants list that are deemed profitable). - bool IsPartial = true; - SmallVector<Constant *> ActualConstArg; - if (!isArgumentInteresting(&FormalArg, ActualConstArg, IsPartial)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n"); + // take on any constant values, they are collected in Constants. + SmallVector<CallArgBinding, 8> ActualArgs; + if (!isArgumentInteresting(&FormalArg, ActualArgs)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " + << FormalArg.getNameOrAsOperand() + << " is not interesting\n"); continue; } - for (auto *ActualArg : ActualConstArg) { - InstructionCost Gain = - ForceFunctionSpecialization - ? 1 - : getSpecializationBonus(&FormalArg, ActualArg) - Cost; + for (const auto &Entry : ActualArgs) { + CallBase *Call = Entry.first; + Constant *ActualArg = Entry.second; - if (Gain <= 0) - continue; - Worklist.push_back({F, &FormalArg, ActualArg, Gain}); + auto I = Specializations.insert({Call, SpecializationInfo()}); + SpecializationInfo &S = I.first->second; + + if (I.second) + S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost; + if (!ForceFunctionSpecialization) + S.Gain += getSpecializationBonus(&FormalArg, ActualArg); + S.Args.push_back({&FormalArg, ActualArg}); } + } - if (Worklist.empty()) - continue; + // Remove unprofitable specializations. + Specializations.remove_if( + [](const auto &Entry) { return Entry.second.Gain <= 0; }); - // Sort the candidates in descending order. - llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) { - return L.Gain > R.Gain; - }); + // Clear the MapVector and return the underlying vector. + WorkList = Specializations.takeVector(); - // Truncate the worklist to 'MaxClonesThreshold' candidates if - // necessary. - if (Worklist.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "Truncating worklist to " << MaxClonesThreshold - << " candidates.\n"); - Worklist.erase(Worklist.begin() + MaxClonesThreshold, - Worklist.end()); - } + // Sort the candidates in descending order. + llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { + return L.second.Gain > R.second.Gain; + }); - if (IsPartial || Worklist.size() < ActualConstArg.size()) - for (auto &ActualArg : Worklist) - ActualArg.Partial = true; + // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. + if (WorkList.size() > MaxClonesThreshold) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " + << "the maximum number of clones threshold.\n" + << "FnSpecialization: Truncating worklist to " + << MaxClonesThreshold << " candidates.\n"); + WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + } - LLVM_DEBUG(dbgs() << "Sorted list of candidates by gain:\n"; - for (auto &C - : Worklist) { - dbgs() << "- Function = " << C.Fn->getName() << ", "; - dbgs() << "FormalArg = " << C.Arg->getName() << ", "; - dbgs() << "ActualArg = " << C.Const->getName() << ", "; - dbgs() << "Gain = " << C.Gain << "\n"; - }); + LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " + << F->getName() << "\n"; + for (const auto &Entry + : WorkList) { + dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain + << "\n"; + for (const ArgInfo &Arg : Entry.second.Args) + dbgs() << "FnSpecialization: FormalArg = " + << Arg.Formal->getNameOrAsOperand() + << ", ActualArg = " + << Arg.Actual->getNameOrAsOperand() << "\n"; + }); - // FIXME: Only one argument per function. - break; - } - return Worklist; + return !WorkList.empty(); } - bool isCandidateFunction(Function *F, FuncList &Specializations) { + bool isCandidateFunction(Function *F) { // Do not specialize the cloned function again. if (SpecializedFuncs.contains(F)) return false; @@ -461,44 +514,45 @@ private: return true; } - void specializeFunction(ArgInfo &AI, FuncList &Specializations) { - Function *Clone = cloneCandidateFunction(AI.Fn); - Argument *ClonedArg = Clone->getArg(AI.Arg->getArgNo()); + void specializeFunction(Function *F, SpecializationInfo &S, + FuncList &WorkList) { + ValueToValueMapTy Mappings; + Function *Clone = cloneCandidateFunction(F, Mappings); // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Const); + rewriteCallSites(Clone, S.Args, Mappings); // Initialize the lattice state of the arguments of the function clone, // marking the argument on which we specialized the function constant // with the given value. - Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Const); + Solver.markArgInFuncSpecialization(Clone, S.Args); // Mark all the specialized functions - Specializations.push_back(Clone); + WorkList.push_back(Clone); NbFunctionsSpecialized++; // If the function has been completely specialized, the original function // is no longer needed. Mark it unreachable. - if (!AI.Partial) - Solver.markFunctionUnreachable(AI.Fn); + if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { + if (auto *CS = dyn_cast<CallBase>(U)) + return CS->getFunction() == F; + return false; + })) { + Solver.markFunctionUnreachable(F); + FullySpecialized.insert(F); + } } /// Compute and return the cost of specializing function \p F. InstructionCost getSpecializationCost(Function *F) { - // Compute the code metrics for the function. - SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - CodeMetrics Metrics; - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - + CodeMetrics &Metrics = analyzeFunction(F); // If the code metrics reveal that we shouldn't duplicate the function, we // shouldn't specialize it. Set the specialization cost to Invalid. // Or if the lines of codes implies that this function is easy to get // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || (!ForceFunctionSpecialization && - Metrics.NumInsts < SmallFunctionThreshold)) { + *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) { InstructionCost C{}; C.setInvalid(); return C; @@ -539,31 +593,20 @@ private: DominatorTree DT(*F); LoopInfo LI(DT); auto &TTI = (GetTTI)(*F); - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for: " << *A - << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << C->getNameOrAsOperand() << "\n"); InstructionCost TotalCost = 0; for (auto *U : A->users()) { TotalCost += getUserBonus(U, TTI, LI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; + LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); } // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a - // function pointer, give up. - if (!isa<PointerType>(A->getType()) || - !isa<FunctionType>(A->getType()->getPointerElementType())) - return TotalCost; - - // Since the argument is a function pointer, its incoming constant values - // should be functions or constant expressions. The code below attempts to - // look through cast expressions to find the function that will be called. - Value *CalledValue = C; - while (isa<ConstantExpr>(CalledValue) && - cast<ConstantExpr>(CalledValue)->isCast()) - CalledValue = cast<User>(CalledValue)->getOperand(0); - Function *CalledFunction = dyn_cast<Function>(CalledValue); + // (potentially casted) function pointer, give up. + Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts()); if (!CalledFunction) return TotalCost; @@ -603,6 +646,9 @@ private: Bonus += Params.DefaultThreshold; else if (IC.isVariable() && IC.getCostDelta() > 0) Bonus += IC.getCostDelta(); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus + << " for user " << *U << "\n"); } return TotalCost + Bonus; @@ -615,15 +661,12 @@ private: /// specializing the function based on the incoming values of argument \p A /// would result in any significant optimization opportunities. If /// optimization opportunities exist, the constant values of \p A on which to - /// specialize the function are collected in \p Constants. If the values in - /// \p Constants represent the complete set of values that \p A can take on, - /// the function will be completely specialized, and the \p IsPartial flag is - /// set to false. + /// specialize the function are collected in \p Constants. /// /// \returns true if the function should be specialized on the given /// argument. - bool isArgumentInteresting(Argument *A, ConstList &Constants, - bool &IsPartial) { + bool isArgumentInteresting(Argument *A, + SmallVectorImpl<CallArgBinding> &Constants) { // For now, don't attempt to specialize functions based on the values of // composite types. if (!A->getType()->isSingleValueType() || A->user_empty()) @@ -632,8 +675,9 @@ private: // If the argument isn't overdefined, there's nothing to do. It should // already be constant. if (!Solver.getLatticeValueFor(A).isOverdefined()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: nothing to do, arg is already " - << "constant?\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " + << A->getNameOrAsOperand() + << " is already constant?\n"); return false; } @@ -650,20 +694,26 @@ private: // // TODO 2: this currently does not support constants, i.e. integer ranges. // - IsPartial = !getPossibleConstants(A, Constants); - LLVM_DEBUG(dbgs() << "FnSpecialization: interesting arg: " << *A << "\n"); + getPossibleConstants(A, Constants); + + if (Constants.empty()) + return false; + + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " + << A->getNameOrAsOperand() << "\n"); return true; } /// Collect in \p Constants all the constant values that argument \p A can /// take on. - /// - /// \returns true if all of the values the argument can take on are constant - /// (e.g., the argument's parent function cannot be called with an - /// overdefined value). - bool getPossibleConstants(Argument *A, ConstList &Constants) { + void getPossibleConstants(Argument *A, + SmallVectorImpl<CallArgBinding> &Constants) { Function *F = A->getParent(); - bool AllConstant = true; + + // SCCP solver does not record an argument that will be constructed on + // stack. + if (A->hasByValAttr() && !F->onlyReadsMemory()) + return; // Iterate over all the call sites of the argument's parent function. for (User *U : F->users()) { @@ -672,10 +722,8 @@ private: auto &CS = *cast<CallBase>(U); // If the call site has attribute minsize set, that callsite won't be // specialized. - if (CS.hasFnAttr(Attribute::MinSize)) { - AllConstant = false; + if (CS.hasFnAttr(Attribute::MinSize)) continue; - } // If the parent of the call site will never be executed, we don't need // to worry about the passed value. @@ -684,13 +732,7 @@ private: auto *V = CS.getArgOperand(A->getArgNo()); if (isa<PoisonValue>(V)) - return false; - - // For now, constant expressions are fine but only if they are function - // calls. - if (auto *CE = dyn_cast<ConstantExpr>(V)) - if (!isa<Function>(CE->getOperand(0))) - return false; + return; // TrackValueOfGlobalVariable only tracks scalar global variables. if (auto *GV = dyn_cast<GlobalVariable>(V)) { @@ -698,36 +740,32 @@ private: // global values. if (!GV->isConstant()) if (!SpecializeOnAddresses) - return false; + return; if (!GV->getValueType()->isSingleValueType()) - return false; + return; } if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() || EnableSpecializationForLiteralConstant)) - Constants.push_back(cast<Constant>(V)); - else - AllConstant = false; + Constants.push_back({&CS, cast<Constant>(V)}); } - - // If the argument can only take on constant values, AllConstant will be - // true. - return AllConstant; } /// Rewrite calls to function \p F to call function \p Clone instead. /// - /// This function modifies calls to function \p F whose argument at index \p - /// ArgNo is equal to constant \p C. The calls are rewritten to call function - /// \p Clone instead. + /// This function modifies calls to function \p F as long as the actual + /// arguments match those in \p Args. Note that for recursive calls we + /// need to compare against the cloned formal arguments. /// /// Callsites that have been marked with the MinSize function attribute won't /// be specialized and rewritten. - void rewriteCallSites(Function *F, Function *Clone, Argument &Arg, - Constant *C) { - unsigned ArgNo = Arg.getArgNo(); - SmallVector<CallBase *, 4> CallSitesToRewrite; + void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args, + ValueToValueMapTy &Mappings) { + assert(!Args.empty() && "Specialization without arguments"); + Function *F = Args[0].Formal->getParent(); + + SmallVector<CallBase *, 8> CallSitesToRewrite; for (auto *U : F->users()) { if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) continue; @@ -736,35 +774,50 @@ private: continue; CallSitesToRewrite.push_back(&CS); } + + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " + << F->getName() << " with " << Clone->getName() << "\n"); + for (auto *CS : CallSitesToRewrite) { - if ((CS->getFunction() == Clone && CS->getArgOperand(ArgNo) == &Arg) || - CS->getArgOperand(ArgNo) == C) { + LLVM_DEBUG(dbgs() << "FnSpecialization: " + << CS->getFunction()->getName() << " ->" << *CS + << "\n"); + if (/* recursive call */ + (CS->getFunction() == Clone && + all_of(Args, + [CS, &Mappings](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; + })) || + /* normal call */ + all_of(Args, [CS](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Arg.Actual; + })) { CS->setCalledFunction(Clone); Solver.markOverdefined(CS); } } } - void updateSpecializedFuncs(FuncList &FuncDecls, - FuncList &CurrentSpecializations) { - for (auto *SpecializedFunc : CurrentSpecializations) { - SpecializedFuncs.insert(SpecializedFunc); + void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) { + for (auto *F : WorkList) { + SpecializedFuncs.insert(F); // Initialize the state of the newly created functions, marking them // argument-tracked and executable. - if (SpecializedFunc->hasExactDefinition() && - !SpecializedFunc->hasFnAttribute(Attribute::Naked)) - Solver.addTrackedFunction(SpecializedFunc); + if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) + Solver.addTrackedFunction(F); - Solver.addArgumentTrackedFunction(SpecializedFunc); - FuncDecls.push_back(SpecializedFunc); - Solver.markBlockExecutable(&SpecializedFunc->front()); + Solver.addArgumentTrackedFunction(F); + Candidates.push_back(F); + Solver.markBlockExecutable(&F->front()); // Replace the function arguments for the specialized functions. - for (Argument &Arg : SpecializedFunc->args()) + for (Argument &Arg : F->args()) if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg)) LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " - << Arg.getName() << "\n"); + << Arg.getNameOrAsOperand() << "\n"); } } }; @@ -871,22 +924,26 @@ bool llvm::runFunctionSpecialization( // Initially resolve the constants in all the argument tracked functions. RunSCCPSolver(FuncDecls); - SmallVector<Function *, 2> CurrentSpecializations; + SmallVector<Function *, 8> WorkList; unsigned I = 0; while (FuncSpecializationMaxIters != I++ && - FS.specializeFunctions(FuncDecls, CurrentSpecializations)) { + FS.specializeFunctions(FuncDecls, WorkList)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n"); // Run the solver for the specialized functions. - RunSCCPSolver(CurrentSpecializations); + RunSCCPSolver(WorkList); // Replace some unresolved constant arguments. constantArgPropagation(FuncDecls, M, Solver); - CurrentSpecializations.clear(); + WorkList.clear(); Changed = true; } - // Clean up the IR by removing ssa_copy intrinsics. + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = " + << NumFuncSpecialized << "\n"); + + // Remove any ssa_copy intrinsics that may have been introduced. removeSSACopy(M); return Changed; } diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 5e5d2086adc2..f35827220bb6 100644 --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -21,7 +21,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -34,7 +33,7 @@ using namespace llvm; #define DEBUG_TYPE "globaldce" static cl::opt<bool> - ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::desc("Enable virtual function elimination")); STATISTIC(NumAliases , "Number of global aliases removed"); @@ -86,6 +85,9 @@ ModulePass *llvm::createGlobalDCEPass() { /// Returns true if F is effectively empty. static bool isEmptyFunction(Function *F) { + // Skip external functions. + if (F->isDeclaration()) + return false; BasicBlock &Entry = F->getEntryBlock(); for (auto &I : Entry) { if (I.isDebugOrPseudoInst()) @@ -214,14 +216,14 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, if (!Ptr) { LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); VFESafeVTables.erase(VTable); - return; + continue; } auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts()); if (!Callee) { LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); VFESafeVTables.erase(VTable); - return; + continue; } LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " @@ -298,7 +300,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // marked as alive are discarded. // Remove empty functions from the global ctors list. - Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + Changed |= optimizeGlobalCtorsList( + M, [](uint32_t, Function *F) { return isEmptyFunction(F); }); // Collect the set of members for each comdat. for (Function &F : M) @@ -317,7 +320,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { - Changed |= RemoveUnusedGlobalValue(GO); + GO.removeDeadConstantUsers(); // Functions with external linkage are needed if they have a body. // Externally visible & appending globals are needed, if they have an // initializer. @@ -330,7 +333,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Compute direct dependencies of aliases. for (GlobalAlias &GA : M.aliases()) { - Changed |= RemoveUnusedGlobalValue(GA); + GA.removeDeadConstantUsers(); // Externally visible aliases are needed. if (!GA.isDiscardableIfUnused()) MarkLive(GA); @@ -340,7 +343,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Compute direct dependencies of ifuncs. for (GlobalIFunc &GIF : M.ifuncs()) { - Changed |= RemoveUnusedGlobalValue(GIF); + GIF.removeDeadConstantUsers(); // Externally visible ifuncs are needed. if (!GIF.isDiscardableIfUnused()) MarkLive(GIF); @@ -403,7 +406,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { // Now that all interferences have been dropped, delete the actual objects // themselves. auto EraseUnusedGlobalValue = [&](GlobalValue *GV) { - RemoveUnusedGlobalValue(*GV); + GV->removeDeadConstantUsers(); GV->eraseFromParent(); Changed = true; }; @@ -455,16 +458,3 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { return PreservedAnalyses::none(); return PreservedAnalyses::all(); } - -// RemoveUnusedGlobalValue - Loop over all of the uses of the specified -// GlobalValue, looking for the constant pointer ref that may be pointing to it. -// If found, check to see if the constant pointer ref is safe to destroy, and if -// so, nuke it. This will reduce the reference count on the global value, which -// might make it deader. -// -bool GlobalDCEPass::RemoveUnusedGlobalValue(GlobalValue &GV) { - if (GV.use_empty()) - return false; - GV.removeDeadConstantUsers(); - return GV.use_empty(); -} diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1cb32e32c895..1a1bde4f0668 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" @@ -37,7 +38,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -60,7 +60,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -100,7 +99,7 @@ static cl::opt<bool> cl::init(false), cl::Hidden); static cl::opt<int> ColdCCRelFreq( - "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, + "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::desc( "Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a call site to be considered cold for enabling" @@ -232,7 +231,7 @@ CleanupPointerRootUsers(GlobalVariable *GV, if (MemSrc && MemSrc->isConstant()) { Changed = true; MTI->eraseFromParent(); - } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) { + } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) { if (I->hasOneUse()) Dead.push_back(std::make_pair(I, MTI)); } @@ -405,9 +404,37 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); DIExpression *Expr = GVE->getExpression(); + int64_t CurVarOffsetInBytes = 0; + uint64_t CurVarOffsetInBits = 0; + + // Calculate the offset (Bytes), Continue if unknown. + if (!Expr->extractIfOffset(CurVarOffsetInBytes)) + continue; + + // Ignore negative offset. + if (CurVarOffsetInBytes < 0) + continue; + + // Convert offset to bits. + CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes; + + // Current var starts after the fragment, ignore. + if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits)) + continue; + + uint64_t CurVarSize = Var->getType()->getSizeInBits(); + // Current variable ends before start of fragment, ignore. + if (CurVarSize != 0 && + (CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits) + continue; + + // Current variable fits in the fragment. + if (CurVarOffsetInBits == FragmentOffsetInBits && + CurVarSize == FragmentSizeInBits) + Expr = DIExpression::get(Expr->getContext(), {}); // If the FragmentSize is smaller than the variable, // emit a fragment expression. - if (FragmentSizeInBits < VarSize) { + else if (FragmentSizeInBits < VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -581,17 +608,14 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, // Will trap. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { if (CI->getCalledOperand() != V) { - //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { if (II->getCalledOperand() != V) { - //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { @@ -615,7 +639,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, // the comparing of the value of the created global init bool later in // optimizeGlobalAddressOfAllocation for the global variable. } else { - //cerr << "NONTRAPPING USE: " << *U; return false; } } @@ -878,7 +901,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI, } } - SmallPtrSet<Constant *, 1> RepValues; + SmallSetVector<Constant *, 1> RepValues; RepValues.insert(NewGV); // If there is a comparison against null, we will insert a global bool to @@ -1015,7 +1038,6 @@ valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI, /// accessing the data, and exposes the resultant global to further GlobalOpt. static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, CallInst *CI, - AtomicOrdering Ordering, const DataLayout &DL, TargetLibraryInfo *TLI) { if (!isAllocRemovable(CI, TLI)) @@ -1062,7 +1084,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, // its initializer) is ever stored to the global. static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, - AtomicOrdering Ordering, const DataLayout &DL, + const DataLayout &DL, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1087,7 +1109,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, } else if (isAllocationFn(StoredOnceVal, GetTLI)) { if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) { auto *TLI = &GetTLI(*CI->getFunction()); - if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, Ordering, DL, TLI)) + if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI)) return true; } } @@ -1257,8 +1279,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { return true; } -static bool deleteIfDead( - GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { +static bool +deleteIfDead(GlobalValue &GV, + SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats, + function_ref<void(Function &)> DeleteFnCallback = nullptr) { GV.removeDeadConstantUsers(); if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) @@ -1277,6 +1301,10 @@ static bool deleteIfDead( return false; LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); + if (auto *F = dyn_cast<Function>(&GV)) { + if (DeleteFnCallback) + DeleteFnCallback(*F); + } GV.eraseFromParent(); ++NumDeleted; return true; @@ -1416,6 +1444,42 @@ static void makeAllConstantUsesInstructions(Constant *C) { } } +// For a global variable with one store, if the store dominates any loads, +// those loads will always load the stored value (as opposed to the +// initializer), even in the presence of recursion. +static bool forwardStoredOnceStore( + GlobalVariable *GV, const StoreInst *StoredOnceStore, + function_ref<DominatorTree &(Function &)> LookupDomTree) { + const Value *StoredOnceValue = StoredOnceStore->getValueOperand(); + // We can do this optimization for non-constants in nosync + norecurse + // functions, but globals used in exactly one norecurse functions are already + // promoted to an alloca. + if (!isa<Constant>(StoredOnceValue)) + return false; + const Function *F = StoredOnceStore->getFunction(); + SmallVector<LoadInst *> Loads; + for (User *U : GV->users()) { + if (auto *LI = dyn_cast<LoadInst>(U)) { + if (LI->getFunction() == F && + LI->getType() == StoredOnceValue->getType() && LI->isSimple()) + Loads.push_back(LI); + } + } + // Only compute DT if we have any loads to examine. + bool MadeChange = false; + if (!Loads.empty()) { + auto &DT = LookupDomTree(*const_cast<Function *>(F)); + for (auto *LI : Loads) { + if (DT.dominates(StoredOnceStore, LI)) { + LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue)); + LI->eraseFromParent(); + MadeChange = true; + } + } + } + return MadeChange; +} + /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. static bool @@ -1572,9 +1636,15 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI)) + if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI)) return true; + // Try to forward the store to any loads. If we have more than one store, we + // may have a store of the initializer between StoredOnceStore and a load. + if (GS.NumStores == 1) + if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree)) + return true; + // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. Skip this optimization for AS that doesn't allow an initializer. if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && @@ -1755,7 +1825,7 @@ hasOnlyColdCalls(Function &F, return false; if (!CalledFn->hasLocalLinkage()) return false; - // Skip over instrinsics since they won't remain as function calls. + // Skip over intrinsics since they won't remain as function calls. if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) continue; // Check if it's valid to use coldcc calling convention. @@ -1884,7 +1954,9 @@ OptimizeFunctions(Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree, - SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { + SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats, + function_ref<void(Function &F)> ChangedCFGCallback, + function_ref<void(Function &F)> DeleteFnCallback) { bool Changed = false; @@ -1904,7 +1976,7 @@ OptimizeFunctions(Module &M, if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage()) F.setLinkage(GlobalValue::InternalLinkage); - if (deleteIfDead(F, NotDiscardableComdats)) { + if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) { Changed = true; continue; } @@ -1917,13 +1989,11 @@ OptimizeFunctions(Module &M, // So, remove unreachable blocks from the function, because a) there's // no point in analyzing them and b) GlobalOpt should otherwise grow // some more complicated logic to break these cycles. - // Removing unreachable blocks might invalidate the dominator so we - // recalculate it. + // Notify the analysis manager that we've modified the function's CFG. if (!F.isDeclaration()) { if (removeUnreachableBlocks(F)) { - auto &DT = LookupDomTree(F); - DT.recalculate(F); Changed = true; + ChangedCFGCallback(F); } } @@ -2031,6 +2101,9 @@ OptimizeGlobalVars(Module &M, /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, TargetLibraryInfo *TLI) { + // Skip external functions. + if (F->isDeclaration()) + return false; // Call the function. Evaluator Eval(DL, TLI); Constant *RetValDummy; @@ -2383,15 +2456,19 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { return Changed; } -static bool optimizeGlobalsInModule( - Module &M, const DataLayout &DL, - function_ref<TargetLibraryInfo &(Function &)> GetTLI, - function_ref<TargetTransformInfo &(Function &)> GetTTI, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI, - function_ref<DominatorTree &(Function &)> LookupDomTree) { +static bool +optimizeGlobalsInModule(Module &M, const DataLayout &DL, + function_ref<TargetLibraryInfo &(Function &)> GetTLI, + function_ref<TargetTransformInfo &(Function &)> GetTTI, + function_ref<BlockFrequencyInfo &(Function &)> GetBFI, + function_ref<DominatorTree &(Function &)> LookupDomTree, + function_ref<void(Function &F)> ChangedCFGCallback, + function_ref<void(Function &F)> DeleteFnCallback) { SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; bool Changed = false; bool LocalChange = true; + Optional<uint32_t> FirstNotFullyEvaluatedPriority; + while (LocalChange) { LocalChange = false; @@ -2411,12 +2488,20 @@ static bool optimizeGlobalsInModule( // Delete functions that are trivially dead, ccc -> fastcc LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, - NotDiscardableComdats); + NotDiscardableComdats, ChangedCFGCallback, + DeleteFnCallback); // Optimize global_ctors list. - LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { - return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); - }); + LocalChange |= + optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) { + if (FirstNotFullyEvaluatedPriority && + *FirstNotFullyEvaluatedPriority != Priority) + return false; + bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F)); + if (!Evaluated) + FirstNotFullyEvaluatedPriority = Priority; + return Evaluated; + }); // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree, @@ -2457,10 +2542,23 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { return FAM.getResult<BlockFrequencyAnalysis>(F); }; + auto ChangedCFGCallback = [&FAM](Function &F) { + FAM.invalidate(F, PreservedAnalyses::none()); + }; + auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); }; - if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) + if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree, + ChangedCFGCallback, DeleteFnCallback)) return PreservedAnalyses::all(); - return PreservedAnalyses::none(); + + PreservedAnalyses PA = PreservedAnalyses::none(); + // We made sure to clear analyses for deleted functions. + PA.preserve<FunctionAnalysisManagerModuleProxy>(); + // The only place we modify the CFG is when calling + // removeUnreachableBlocks(), but there we make sure to invalidate analyses + // for modified functions. + PA.preserveSet<CFGAnalyses>(); + return PA; } namespace { @@ -2491,8 +2589,13 @@ struct GlobalOptLegacyPass : public ModulePass { return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); }; - return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, - LookupDomTree); + auto ChangedCFGCallback = [&LookupDomTree](Function &F) { + auto &DT = LookupDomTree(F); + DT.recalculate(F); + }; + + return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree, + ChangedCFGCallback, nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp index e7d698c42fcf..7d9e6135b2eb 100644 --- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp +++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp @@ -134,9 +134,9 @@ static bool splitGlobal(GlobalVariable &GV) { } // Finally, remove the original global. Any remaining uses refer to invalid - // elements of the global, so replace with undef. + // elements of the global, so replace with poison. if (!GV.use_empty()) - GV.replaceAllUsesWith(UndefValue::get(GV.getType())); + GV.replaceAllUsesWith(PoisonValue::get(GV.getType())); GV.eraseFromParent(); return true; } diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index a964fcde0396..95e8ae0fd22f 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -29,46 +29,33 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include <algorithm> -#include <limits> #include <cassert> +#include <limits> #include <string> #define DEBUG_TYPE "hotcoldsplit" @@ -126,7 +113,8 @@ bool unlikelyExecuted(BasicBlock &BB) { // mark sanitizer traps as cold. for (Instruction &I : BB) if (auto *CB = dyn_cast<CallBase>(&I)) - if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize")) + if (CB->hasFnAttr(Attribute::Cold) && + !CB->getMetadata(LLVMContext::MD_nosanitize)) return true; // The block is cold if it has an unreachable terminator, unless it's @@ -352,7 +340,7 @@ Function *HotColdSplitting::extractColdRegion( // TODO: Pass BFI and BPI to update profile information. CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr, /* BPI */ nullptr, AC, /* AllowVarArgs */ false, - /* AllowAlloca */ false, + /* AllowAlloca */ false, /* AllocaBlock */ nullptr, /* Suffix */ "cold." + std::to_string(Count)); // Perform a simple cost/benefit analysis to decide whether or not to permit @@ -740,7 +728,7 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) { std::function<OptimizationRemarkEmitter &(Function &)> GetORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; auto LookupAC = [this](Function &F) -> AssumptionCache * { if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>()) @@ -772,7 +760,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) { std::function<OptimizationRemarkEmitter &(Function &)> GetORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index de1c1d379502..ec2b80012ed6 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -24,7 +24,6 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeOpenMPOptCGSCCLegacyPassPass(Registry); - initializeArgPromotionPass(Registry); initializeAnnotation2MetadataLegacyPass(Registry); initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); @@ -70,10 +69,6 @@ void LLVMInitializeIPO(LLVMPassRegistryRef R) { initializeIPO(*unwrap(R)); } -void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createArgumentPromotionPass()); -} - void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCalledValuePropagationPass()); } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index faf7cb7d566a..d75d99e307fd 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -16,8 +16,9 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/PassManager.h" @@ -25,8 +26,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" -#include <map> -#include <set> #include <vector> #define DEBUG_TYPE "iroutliner" @@ -183,11 +182,24 @@ static void getSortedConstantKeys(std::vector<Value *> &SortedKeys, Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other, Value *V) { Optional<unsigned> GVN = Candidate->getGVN(V); - assert(GVN.hasValue() && "No GVN for incoming value"); + assert(GVN && "No GVN for incoming value"); Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN); Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum); Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN); - return FoundValueOpt.getValueOr(nullptr); + return FoundValueOpt.value_or(nullptr); +} + +BasicBlock * +OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other, + BasicBlock *BB) { + Instruction *FirstNonPHI = BB->getFirstNonPHI(); + assert(FirstNonPHI && "block is empty?"); + Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI); + if (!CorrespondingVal) + return nullptr; + BasicBlock *CorrespondingBlock = + cast<Instruction>(CorrespondingVal)->getParent(); + return CorrespondingBlock; } /// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found @@ -264,13 +276,33 @@ void OutlinableRegion::splitCandidate() { // We iterate over the instructions in the region, if we find a PHINode, we // check if there are predecessors outside of the region, if there are, // we ignore this region since we are unable to handle the severing of the - // phi node right now. + // phi node right now. + + // TODO: Handle extraneous inputs for PHINodes through variable number of + // inputs, similar to how outputs are handled. BasicBlock::iterator It = StartInst->getIterator(); + EndBB = BackInst->getParent(); + BasicBlock *IBlock; + BasicBlock *PHIPredBlock = nullptr; + bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst; while (PHINode *PN = dyn_cast<PHINode>(&*It)) { unsigned NumPredsOutsideRegion = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!BBSet.contains(PN->getIncomingBlock(i))) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!BBSet.contains(PN->getIncomingBlock(i))) { + PHIPredBlock = PN->getIncomingBlock(i); ++NumPredsOutsideRegion; + continue; + } + + // We must consider the case there the incoming block to the PHINode is + // the same as the final block of the OutlinableRegion. If this is the + // case, the branch from this block must also be outlined to be valid. + IBlock = PN->getIncomingBlock(i); + if (IBlock == EndBB && EndBBTermAndBackInstDifferent) { + PHIPredBlock = PN->getIncomingBlock(i); + ++NumPredsOutsideRegion; + } + } if (NumPredsOutsideRegion > 1) return; @@ -285,11 +317,9 @@ void OutlinableRegion::splitCandidate() { // If the region ends with a PHINode, but does not contain all of the phi node // instructions of the region, we ignore it for now. - if (isa<PHINode>(BackInst)) { - EndBB = BackInst->getParent(); - if (BackInst != &*std::prev(EndBB->getFirstInsertionPt())) - return; - } + if (isa<PHINode>(BackInst) && + BackInst != &*std::prev(EndBB->getFirstInsertionPt())) + return; // The basic block gets split like so: // block: block: @@ -310,6 +340,10 @@ void OutlinableRegion::splitCandidate() { StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); + // If there was a PHINode with an incoming block outside the region, + // make sure is correctly updated in the newly split block. + if (PHIPredBlock) + PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB); CandidateSplit = true; if (!BackInst->isTerminator()) { @@ -353,6 +387,25 @@ void OutlinableRegion::reattachCandidate() { assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); + // Make sure PHINode references to the block we are merging into are + // updated to be incoming blocks from the predecessor to the current block. + + // NOTE: If this is updated such that the outlined block can have more than + // one incoming block to a PHINode, this logic will have to updated + // to handle multiple precessors instead. + + // We only need to update this if the outlined section contains a PHINode, if + // it does not, then the incoming block was never changed in the first place. + // On the other hand, if PrevBB has no predecessors, it means that all + // incoming blocks to the first block are contained in the region, and there + // will be nothing to update. + Instruction *StartInst = (*Candidate->begin()).Inst; + if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) { + assert(!PrevBB->hasNPredecessorsOrMore(2) && + "PrevBB has more than one predecessor. Should be 0 or 1."); + BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor(); + PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB); + } PrevBB->getTerminator()->eraseFromParent(); // If we reattaching after outlining, we iterate over the phi nodes to @@ -501,7 +554,7 @@ collectRegionsConstants(OutlinableRegion &Region, // the the number has been found to be not the same value in each instance. for (Value *V : ID.OperVals) { Optional<unsigned> GVNOpt = C.getGVN(V); - assert(GVNOpt.hasValue() && "Expected a GVN for operand?"); + assert(GVNOpt && "Expected a GVN for operand?"); unsigned GVN = GVNOpt.getValue(); // Check if this global value has been found to not be the same already. @@ -516,7 +569,7 @@ collectRegionsConstants(OutlinableRegion &Region, // global value number. If the global value does not map to a Constant, // it is considered to not be the same value. Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant); - if (ConstantMatches.hasValue()) { + if (ConstantMatches) { if (ConstantMatches.getValue()) continue; else @@ -597,7 +650,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M); // Transfer the swifterr attribute to the correct function parameter. - if (Group.SwiftErrorArgument.hasValue()) + if (Group.SwiftErrorArgument) Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(), Attribute::SwiftError); @@ -666,6 +719,18 @@ static void moveFunctionData(Function &Old, Function &New, if (!isa<CallInst>(&Val)) { // Remove the debug information for outlined functions. Val.setDebugLoc(DebugLoc()); + + // Loop info metadata may contain line locations. Update them to have no + // value in the new subprogram since the outlined code could be from + // several locations. + auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * { + if (DISubprogram *SP = New.getSubprogram()) + if (auto *Loc = dyn_cast_or_null<DILocation>(MD)) + return DILocation::get(New.getContext(), Loc->getLine(), + Loc->getColumn(), SP, nullptr); + return MD; + }; + updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc); continue; } @@ -691,8 +756,6 @@ static void moveFunctionData(Function &Old, Function &New, for (Instruction *I : DebugInsts) I->eraseFromParent(); } - - assert(NewEnds.size() > 0 && "No return instruction for new function?"); } /// Find the the constants that will need to be lifted into arguments @@ -714,7 +777,7 @@ static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame, for (Value *V : (*IDIt).OperVals) { // Since these are stored before any outlining, they will be in the // global value numbering. - unsigned GVN = C.getGVN(V).getValue(); + unsigned GVN = *C.getGVN(V); if (isa<Constant>(V)) if (NotSame.contains(GVN) && !Seen.contains(GVN)) { Inputs.push_back(GVN); @@ -745,8 +808,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C, assert(Input && "Have a nullptr as an input"); if (OutputMappings.find(Input) != OutputMappings.end()) Input = OutputMappings.find(Input)->second; - assert(C.getGVN(Input).hasValue() && - "Could not find a numbering for the given input"); + assert(C.getGVN(Input) && "Could not find a numbering for the given input"); EndInputNumbers.push_back(C.getGVN(Input).getValue()); } } @@ -885,11 +947,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region, // numbering overrides any discovered location for the extracted code. for (unsigned InputVal : InputGVNs) { Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal); - assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?"); + assert(CanonicalNumberOpt && "Canonical number not found?"); unsigned CanonicalNumber = CanonicalNumberOpt.getValue(); Optional<Value *> InputOpt = C.fromGVN(InputVal); - assert(InputOpt.hasValue() && "Global value number not found?"); + assert(InputOpt && "Global value number not found?"); Value *Input = InputOpt.getValue(); DenseMap<unsigned, unsigned>::iterator AggArgIt = @@ -901,7 +963,7 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region, // argument in the overall function. if (Input->isSwiftError()) { assert( - !Group.SwiftErrorArgument.hasValue() && + !Group.SwiftErrorArgument && "Argument already marked with swifterr for this OutlinableGroup!"); Group.SwiftErrorArgument = TypeIndex; } @@ -969,12 +1031,11 @@ static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, // We check to see if the value is used by the PHINode from some other // predecessor not included in the region. If it is, we make sure // to keep it as an output. - SmallVector<unsigned, 2> IncomingNumbers(PN.getNumIncomingValues()); - std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0); - if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { - return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && - !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); - })) + if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()), + [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { + return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && + !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); + })) return true; // Check if the value is used by any other instructions outside the region. @@ -1098,30 +1159,72 @@ static hash_code encodePHINodeData(PHINodeData &PND) { /// /// \param Region - The region that \p PN is an output for. /// \param PN - The PHINode we are analyzing. +/// \param Blocks - The blocks for the region we are analyzing. /// \param AggArgIdx - The argument \p PN will be stored into. /// \returns An optional holding the assigned canonical number, or None if /// there is some attribute of the PHINode blocking it from being used. static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region, - PHINode *PN, unsigned AggArgIdx) { + PHINode *PN, + DenseSet<BasicBlock *> &Blocks, + unsigned AggArgIdx) { OutlinableGroup &Group = *Region.Parent; IRSimilarityCandidate &Cand = *Region.Candidate; BasicBlock *PHIBB = PN->getParent(); CanonList PHIGVNs; - for (Value *Incoming : PN->incoming_values()) { - // If we cannot find a GVN, this means that the input to the PHINode is - // not included in the region we are trying to analyze, meaning, that if - // it was outlined, we would be adding an extra input. We ignore this - // case for now, and so ignore the region. + Value *Incoming; + BasicBlock *IncomingBlock; + for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { + Incoming = PN->getIncomingValue(Idx); + IncomingBlock = PN->getIncomingBlock(Idx); + // If we cannot find a GVN, and the incoming block is included in the region + // this means that the input to the PHINode is not included in the region we + // are trying to analyze, meaning, that if it was outlined, we would be + // adding an extra input. We ignore this case for now, and so ignore the + // region. Optional<unsigned> OGVN = Cand.getGVN(Incoming); - if (!OGVN.hasValue()) { + if (!OGVN && Blocks.contains(IncomingBlock)) { Region.IgnoreRegion = true; return None; } + // If the incoming block isn't in the region, we don't have to worry about + // this incoming value. + if (!Blocks.contains(IncomingBlock)) + continue; + // Collect the canonical numbers of the values in the PHINode. - unsigned GVN = OGVN.getValue(); + unsigned GVN = *OGVN; OGVN = Cand.getCanonicalNum(GVN); - assert(OGVN.hasValue() && "No GVN found for incoming value?"); + assert(OGVN && "No GVN found for incoming value?"); + PHIGVNs.push_back(*OGVN); + + // Find the incoming block and use the canonical numbering as well to define + // the hash for the PHINode. + OGVN = Cand.getGVN(IncomingBlock); + + // If there is no number for the incoming block, it is becaause we have + // split the candidate basic blocks. So we use the previous block that it + // was split from to find the valid global value numbering for the PHINode. + if (!OGVN) { + assert(Cand.getStartBB() == IncomingBlock && + "Unknown basic block used in exit path PHINode."); + + BasicBlock *PrevBlock = nullptr; + // Iterate over the predecessors to the incoming block of the + // PHINode, when we find a block that is not contained in the region + // we know that this is the first block that we split from, and should + // have a valid global value numbering. + for (BasicBlock *Pred : predecessors(IncomingBlock)) + if (!Blocks.contains(Pred)) { + PrevBlock = Pred; + break; + } + assert(PrevBlock && "Expected a predecessor not in the reigon!"); + OGVN = Cand.getGVN(PrevBlock); + } + GVN = *OGVN; + OGVN = Cand.getCanonicalNum(GVN); + assert(OGVN && "No GVN found for incoming block?"); PHIGVNs.push_back(*OGVN); } @@ -1131,11 +1234,10 @@ static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region, DenseMap<hash_code, unsigned>::iterator GVNToPHIIt; DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt; Optional<unsigned> BBGVN = Cand.getGVN(PHIBB); - assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!"); + assert(BBGVN && "Could not find GVN for the incoming block!"); BBGVN = Cand.getCanonicalNum(BBGVN.getValue()); - assert(BBGVN.hasValue() && - "Could not find canonical number for the incoming block!"); + assert(BBGVN && "Could not find canonical number for the incoming block!"); // Create a pair of the exit block canonical value, and the aggregate // argument location, connected to the canonical numbers stored in the // PHINode. @@ -1262,9 +1364,9 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, // If two PHINodes have the same canonical values, but different aggregate // argument locations, then they will have distinct Canonical Values. - GVN = getGVNForPHINode(Region, PN, AggArgIdx); - if (!GVN.hasValue()) - return; + GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx); + if (!GVN) + return; } else { // If we do not have a PHINode we use the global value numbering for the // output value, to find the canonical number to add to the set of stored @@ -1413,7 +1515,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) { // Make sure that the argument in the new function has the SwiftError // argument. - if (Group.SwiftErrorArgument.hasValue()) + if (Group.SwiftErrorArgument) Call->addParamAttr(Group.SwiftErrorArgument.getValue(), Attribute::SwiftError); @@ -1520,17 +1622,18 @@ getPassedArgumentAndAdjustArgumentLocation(const Argument *A, /// \param OutputMappings [in] - The mapping of output values from outlined /// region to their original values. /// \param CanonNums [out] - The canonical numbering for the incoming values to -/// \p PN. +/// \p PN paired with their incoming block. /// \param ReplacedWithOutlinedCall - A flag to use the extracted function call /// of \p Region rather than the overall function's call. -static void -findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, - const DenseMap<Value *, Value *> &OutputMappings, - DenseSet<unsigned> &CanonNums, - bool ReplacedWithOutlinedCall = true) { +static void findCanonNumsForPHI( + PHINode *PN, OutlinableRegion &Region, + const DenseMap<Value *, Value *> &OutputMappings, + SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums, + bool ReplacedWithOutlinedCall = true) { // Iterate over the incoming values. for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { Value *IVal = PN->getIncomingValue(Idx); + BasicBlock *IBlock = PN->getIncomingBlock(Idx); // If we have an argument as incoming value, we need to grab the passed // value from the call itself. if (Argument *A = dyn_cast<Argument>(IVal)) { @@ -1545,10 +1648,10 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, // Find and add the canonical number for the incoming value. Optional<unsigned> GVN = Region.Candidate->getGVN(IVal); - assert(GVN.hasValue() && "No GVN for incoming value"); + assert(GVN && "No GVN for incoming value"); Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN); - assert(CanonNum.hasValue() && "No Canonical Number for GVN"); - CanonNums.insert(*CanonNum); + assert(CanonNum && "No Canonical Number for GVN"); + CanonNums.push_back(std::make_pair(*CanonNum, IBlock)); } } @@ -1557,19 +1660,26 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, /// function. /// /// \param PN [in] - The PHINode that we are finding the canonical numbers for. -/// \param Region [in] - The OutlinableRegion containing \p PN. +/// \param Region [in] - The OutlinableRegion containing \p PN. /// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find /// \p PN in. /// \param OutputMappings [in] - The mapping of output values from outlined /// region to their original values. +/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been +/// matched. /// \return the newly found or created PHINode in \p OverallPhiBlock. static PHINode* findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, - const DenseMap<Value *, Value *> &OutputMappings) { + const DenseMap<Value *, Value *> &OutputMappings, + DenseSet<PHINode *> &UsedPHIs) { OutlinableGroup &Group = *Region.Parent; - DenseSet<unsigned> PNCanonNums; + + // A list of the canonical numbering assigned to each incoming value, paired + // with the incoming block for the PHINode passed into this function. + SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums; + // We have to use the extracted function since we have merged this region into // the overall function yet. We make sure to reassign the argument numbering // since it is possible that the argument ordering is different between the @@ -1578,18 +1688,61 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, /* ReplacedWithOutlinedCall = */ false); OutlinableRegion *FirstRegion = Group.Regions[0]; - DenseSet<unsigned> CurrentCanonNums; + + // A list of the canonical numbering assigned to each incoming value, paired + // with the incoming block for the PHINode that we are currently comparing + // the passed PHINode to. + SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums; + // Find the Canonical Numbering for each PHINode, if it matches, we replace // the uses of the PHINode we are searching for, with the found PHINode. for (PHINode &CurrPN : OverallPhiBlock->phis()) { + // If this PHINode has already been matched to another PHINode to be merged, + // we skip it. + if (UsedPHIs.contains(&CurrPN)) + continue; + CurrentCanonNums.clear(); findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, /* ReplacedWithOutlinedCall = */ true); - if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { - return CurrentCanonNums.contains(CanonNum); - })) + // If the list of incoming values is not the same length, then they cannot + // match since there is not an analogue for each incoming value. + if (PNCanonNums.size() != CurrentCanonNums.size()) + continue; + + bool FoundMatch = true; + + // We compare the canonical value for each incoming value in the passed + // in PHINode to one already present in the outlined region. If the + // incoming values do not match, then the PHINodes do not match. + + // We also check to make sure that the incoming block matches as well by + // finding the corresponding incoming block in the combined outlined region + // for the current outlined region. + for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) { + std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx]; + std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx]; + if (ToCompareTo.first != ToAdd.first) { + FoundMatch = false; + break; + } + + BasicBlock *CorrespondingBlock = + Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second); + assert(CorrespondingBlock && "Found block is nullptr"); + if (CorrespondingBlock != ToCompareTo.second) { + FoundMatch = false; + break; + } + } + + // If all incoming values and branches matched, then we can merge + // into the found PHINode. + if (FoundMatch) { + UsedPHIs.insert(&CurrPN); return &CurrPN; + } } // If we've made it here, it means we weren't able to replace the PHINode, so @@ -1603,12 +1756,8 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, // Find corresponding basic block in the overall function for the incoming // block. - Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI(); - assert(FirstNonPHI && "Incoming block is empty?"); - Value *CorrespondingVal = - Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI); - assert(CorrespondingVal && "Value is nullptr?"); - BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent(); + BasicBlock *BlockToUse = + Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock); NewPN->setIncomingBlock(Idx, BlockToUse); // If we have an argument we make sure we replace using the argument from @@ -1623,6 +1772,10 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, IncomingVal = findOutputMapping(OutputMappings, IncomingVal); Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal); assert(Val && "Value is nullptr?"); + DenseMap<Value *, Value *>::iterator RemappedIt = + FirstRegion->RemappedArguments.find(Val); + if (RemappedIt != FirstRegion->RemappedArguments.end()) + Val = RemappedIt->second; NewPN->setIncomingValue(Idx, Val); } return NewPN; @@ -1649,6 +1802,7 @@ replaceArgumentUses(OutlinableRegion &Region, if (FirstFunction) DominatingFunction = Group.OutlinedFunction; DominatorTree DT(*DominatingFunction); + DenseSet<PHINode *> UsedPHIs; for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); ArgIdx++) { @@ -1665,6 +1819,8 @@ replaceArgumentUses(OutlinableRegion &Region, << *Region.ExtractedFunction << " with " << *AggArg << " in function " << *Group.OutlinedFunction << "\n"); Arg->replaceAllUsesWith(AggArg); + Value *V = Region.Call->getArgOperand(ArgIdx); + Region.RemappedArguments.insert(std::make_pair(V, AggArg)); continue; } @@ -1713,7 +1869,7 @@ replaceArgumentUses(OutlinableRegion &Region, // If this is storing a PHINode, we must make sure it is included in the // overall function. if (!isa<PHINode>(ValueOperand) || - Region.Candidate->getGVN(ValueOperand).hasValue()) { + Region.Candidate->getGVN(ValueOperand).has_value()) { if (FirstFunction) continue; Value *CorrVal = @@ -1725,7 +1881,7 @@ replaceArgumentUses(OutlinableRegion &Region, PHINode *PN = cast<PHINode>(SI->getValueOperand()); // If it has a value, it was not split by the code extractor, which // is what we are looking for. - if (Region.Candidate->getGVN(PN).hasValue()) + if (Region.Candidate->getGVN(PN)) continue; // We record the parent block for the PHINode in the Region so that @@ -1748,8 +1904,8 @@ replaceArgumentUses(OutlinableRegion &Region, // For our PHINode, we find the combined canonical numbering, and // attempt to find a matching PHINode in the overall PHIBlock. If we // cannot, we copy the PHINode and move it into this new block. - PHINode *NewPN = - findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); + PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, + OutputMappings, UsedPHIs); NewI->setOperand(0, NewPN); } @@ -1923,7 +2079,7 @@ static void alignOutputBlockWithAggFunc( // If there is, we remove the new output blocks. If it does not, // we add it to our list of sets of output blocks. - if (MatchingBB.hasValue()) { + if (MatchingBB) { LLVM_DEBUG(dbgs() << "Set output block for region in function" << Region.ExtractedFunction << " to " << MatchingBB.getValue()); @@ -2279,6 +2435,9 @@ void IROutliner::pruneIncompatibleRegions( if (BBHasAddressTaken) continue; + if (IRSC.getFunction()->hasOptNone()) + continue; + if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && !OutlineFromLinkODRs) continue; @@ -2343,9 +2502,9 @@ static Value *findOutputValueInRegion(OutlinableRegion &Region, OutputCanon = *It->second.second.begin(); } Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon); - assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?"); + assert(OGVN && "Could not find GVN for Canonical Number?"); Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN); - assert(OV.hasValue() && "Could not find value for GVN?"); + assert(OV && "Could not find value for GVN?"); return *OV; } @@ -2400,11 +2559,8 @@ static InstructionCost findCostForOutputBlocks(Module &M, for (Value *V : ID.OperVals) { BasicBlock *BB = static_cast<BasicBlock *>(V); - DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB); - if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB)) - continue; - FoundBlocks.insert(BB); - NumOutputBranches++; + if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second) + NumOutputBranches++; } } @@ -2520,7 +2676,7 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region, // If we found an output register, place a mapping of the new value // to the original in the mapping. - if (!OutputIdx.hasValue()) + if (!OutputIdx) return; if (OutputMappings.find(Outputs[OutputIdx.getValue()]) == @@ -2680,7 +2836,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, "outlined"); + false, nullptr, "outlined"); findAddInputsOutputs(M, *OS, NotSame); if (!OS->IgnoreRegion) OutlinedRegions.push_back(OS); @@ -2791,7 +2947,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, "outlined"); + false, nullptr, "outlined"); bool FunctionOutlined = extractSection(*OS); if (FunctionOutlined) { unsigned StartIdx = OS->Candidate->getStartIdx(); @@ -2874,7 +3030,7 @@ bool IROutlinerLegacyPass::runOnModule(Module &M) { std::unique_ptr<OptimizationRemarkEmitter> ORE; auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; auto GTTI = [this](Function &F) -> TargetTransformInfo & { @@ -2905,7 +3061,7 @@ PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) { std::function<OptimizationRemarkEmitter &(Function &)> GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & { ORE.reset(new OptimizationRemarkEmitter(&F)); - return *ORE.get(); + return *ORE; }; if (IROutliner(GTTI, GIRSI, GORE).run(M)) diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp index c32e09875a12..76f8f1a7a482 100644 --- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -9,11 +9,8 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -32,7 +29,7 @@ static bool inferAllPrototypeAttributes( // explicitly visited by CGSCC passes in the new pass manager.) if (F.isDeclaration() && !F.hasOptNone()) { if (!F.hasFnAttribute(Attribute::NoBuiltin)) - Changed |= inferLibFuncAttributes(F, GetTLI(F)); + Changed |= inferNonMandatoryLibFuncAttrs(F, GetTLI(F)); Changed |= inferAttributesFromOthers(F); } diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp index 76f1d0c54d08..2143e39d488d 100644 --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -12,14 +12,8 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Inliner.h" diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 49babc24cb82..4d32266eb9ea 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -14,21 +14,21 @@ #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" @@ -37,11 +37,9 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -67,8 +65,6 @@ #include <algorithm> #include <cassert> #include <functional> -#include <sstream> -#include <tuple> #include <utility> #include <vector> @@ -92,11 +88,28 @@ static cl::opt<bool> DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +static cl::opt<int> IntraSCCCostMultiplier( + "intra-scc-cost-multiplier", cl::init(2), cl::Hidden, + cl::desc( + "Cost multiplier to multiply onto inlined call sites where the " + "new call was previously an intra-SCC call (not relevant when the " + "original call was already intra-SCC). This can accumulate over " + "multiple inlinings (e.g. if a call site already had a cost " + "multiplier and one of its inlined calls was also subject to " + "this, the inlined call would have the original multiplier " + "multiplied by intra-scc-cost-multiplier). This is to prevent tons of " + "inlining through a child SCC which can cause terrible compile times")); + /// A flag for test, so we can print the content of the advisor when running it /// as part of the default (e.g. -O3) pipeline. static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing", cl::init(false), cl::Hidden); +/// Allows printing the contents of the advisor after each SCC inliner pass. +static cl::opt<bool> + EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing", + cl::init(false), cl::Hidden); + extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats; static cl::opt<std::string> CGSCCInlineReplayFile( @@ -150,10 +163,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat( "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How cgscc inline replay file is formatted"), cl::Hidden); -static cl::opt<bool> InlineEnablePriorityOrder( - "inline-enable-priority-order", cl::Hidden, cl::init(false), - cl::desc("Enable the priority inline order for the inliner")); - LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) @@ -708,8 +717,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, // duration of the inliner pass, and thus the lifetime of the owned advisor. // The one we would get from the MAM can be invalidated as a result of the // inliner's activity. - OwnedAdvisor = - std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams()); + OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>( + M, FAM, getInlineParams(), + InlineContext{LTOPhase, InlinePass::CGSCCInliner}); if (!CGSCCInlineReplayFile.empty()) OwnedAdvisor = getReplayInlineAdvisor( @@ -718,7 +728,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, CGSCCInlineReplayScope, CGSCCInlineReplayFallback, {CGSCCInlineReplayFormat}}, - /*EmitRemarks=*/true); + /*EmitRemarks=*/true, + InlineContext{LTOPhase, + InlinePass::ReplayCGSCCInliner}); return *OwnedAdvisor; } @@ -744,7 +756,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, .getManager(); InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M); - Advisor.onPassEntry(); + Advisor.onPassEntry(&InitialC); auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); }); @@ -773,12 +785,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. - std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls; - if (InlineEnablePriorityOrder) - Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>(); - else - Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>(); - assert(Calls != nullptr && "Expected an initialized InlineOrder"); + DefaultInlineOrder<std::pair<CallBase *, int>> Calls; // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { @@ -793,7 +800,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (auto *CB = dyn_cast<CallBase>(&I)) if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) - Calls->push({CB, -1}); + Calls.push({CB, -1}); else if (!isa<IntrinsicInst>(I)) { using namespace ore; setInlineRemark(*CB, "unavailable definition"); @@ -807,7 +814,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, } } } - if (Calls->empty()) + if (Calls.empty()) return PreservedAnalyses::all(); // Capture updatable variable for the current SCC. @@ -833,15 +840,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, SmallVector<Function *, 4> DeadFunctionsInComdats; // Loop forward over all of the calls. - while (!Calls->empty()) { + while (!Calls.empty()) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. - Function &F = *Calls->front().first->getCaller(); + Function &F = *Calls.front().first->getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) { - Calls->pop(); + Calls.pop(); continue; } @@ -857,8 +864,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; - while (!Calls->empty() && Calls->front().first->getCaller() == &F) { - auto P = Calls->pop(); + while (!Calls.empty() && Calls.front().first->getCaller() == &F) { + auto P = Calls.pop(); CallBase *CB = P.first; const int InlineHistoryID = P.second; Function &Callee = *CB->getCalledFunction(); @@ -876,8 +883,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. - if (CG.lookupSCC(*CG.lookup(Callee)) == C && - UR.InlinedInternalEdges.count({&N, C})) { + LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); + if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); @@ -897,6 +904,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, continue; } + int CBCostMult = + getStringFnAttrAsInt( + *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName) + .value_or(1); + // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( @@ -935,9 +947,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (tryPromoteCall(*ICB)) NewCallee = ICB->getCalledFunction(); } - if (NewCallee) - if (!NewCallee->isDeclaration()) - Calls->push({ICB, NewHistoryID}); + if (NewCallee) { + if (!NewCallee->isDeclaration()) { + Calls.push({ICB, NewHistoryID}); + // Continually inlining through an SCC can result in huge compile + // times and bloated code since we arbitrarily stop at some point + // when the inliner decides it's not profitable to inline anymore. + // We attempt to mitigate this by making these calls exponentially + // more expensive. + // This doesn't apply to calls in the same SCC since if we do + // inline through the SCC the function will end up being + // self-recursive which the inliner bails out on, and inlining + // within an SCC is necessary for performance. + if (CalleeSCC != C && + CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { + Attribute NewCBCostMult = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineCostMultiplierAttributeName, + itostr(CBCostMult * IntraSCCCostMultiplier)); + ICB->addFnAttr(NewCBCostMult); + } + } + } } } @@ -953,7 +984,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() && !CG.isLibFunction(Callee)) { if (Callee.hasLocalLinkage() || !Callee.hasComdat()) { - Calls->erase_if([&](const std::pair<CallBase *, int> &Call) { + Calls.erase_if([&](const std::pair<CallBase *, int> &Call) { return Call.first->getCaller() == &Callee; }); // Clear the body and queue the function itself for deletion when we @@ -1083,17 +1114,24 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, bool MandatoryFirst, + InlineContext IC, InliningAdvisorMode Mode, unsigned MaxDevirtIterations) - : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) { + : Params(Params), IC(IC), Mode(Mode), + MaxDevirtIterations(MaxDevirtIterations) { // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. - if (MandatoryFirst) + if (MandatoryFirst) { PM.addPass(InlinerPass(/*OnlyMandatory*/ true)); + if (EnablePostSCCAdvisorPrinting) + PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs())); + } PM.addPass(InlinerPass()); + if (EnablePostSCCAdvisorPrinting) + PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs())); } PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, @@ -1103,7 +1141,8 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, {CGSCCInlineReplayFile, CGSCCInlineReplayScope, CGSCCInlineReplayFallback, - {CGSCCInlineReplayFormat}})) { + {CGSCCInlineReplayFormat}}, + IC)) { M.getContext().emitError( "Could not setup Inlining Advisor for the requested " "mode and/or options"); diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 692e445cb7cb..5aa5b905f06c 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -19,7 +19,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/Internalize.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" @@ -33,8 +32,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; #define DEBUG_TYPE "internalize" diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp index d9a59dd35fde..ad1927c09803 100644 --- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -23,14 +23,9 @@ #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" -#include <fstream> -#include <set> using namespace llvm; #define DEBUG_TYPE "loop-extract" diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 8e83d7bcb6c2..d5f1d291f41f 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1223,6 +1223,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { static const unsigned kX86JumpTableEntrySize = 8; static const unsigned kARMJumpTableEntrySize = 4; static const unsigned kARMBTIJumpTableEntrySize = 8; +static const unsigned kRISCVJumpTableEntrySize = 8; unsigned LowerTypeTestsModule::getJumpTableEntrySize() { switch (Arch) { @@ -1238,6 +1239,9 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() { if (BTE->getZExtValue()) return kARMBTIJumpTableEntrySize; return kARMJumpTableEntrySize; + case Triple::riscv32: + case Triple::riscv64: + return kRISCVJumpTableEntrySize; default: report_fatal_error("Unsupported architecture for jump tables"); } @@ -1265,6 +1269,9 @@ void LowerTypeTestsModule::createJumpTableEntry( AsmOS << "b $" << ArgIndex << "\n"; } else if (JumpTableArch == Triple::thumb) { AsmOS << "b.w $" << ArgIndex << "\n"; + } else if (JumpTableArch == Triple::riscv32 || + JumpTableArch == Triple::riscv64) { + AsmOS << "tail $" << ArgIndex << "@plt\n"; } else { report_fatal_error("Unsupported architecture for jump tables"); } @@ -1282,7 +1289,8 @@ Type *LowerTypeTestsModule::getJumpTableEntryType() { void LowerTypeTestsModule::buildBitSetsFromFunctions( ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || - Arch == Triple::thumb || Arch == Triple::aarch64) + Arch == Triple::thumb || Arch == Triple::aarch64 || + Arch == Triple::riscv32 || Arch == Triple::riscv64) buildBitSetsFromFunctionsNative(TypeIds, Functions); else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) buildBitSetsFromFunctionsWASM(TypeIds, Functions); @@ -1427,6 +1435,11 @@ void LowerTypeTestsModule::createJumpTable( F->addFnAttr("branch-target-enforcement", "false"); F->addFnAttr("sign-return-address", "none"); } + if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) { + // Make sure the jump table assembly is not modified by the assembler or + // the linker. + F->addFnAttr("target-features", "-c,-relax"); + } // Make sure we don't emit .eh_frame for this function. F->addFnAttr(Attribute::NoUnwind); @@ -2187,11 +2200,7 @@ bool LowerTypeTestsModule::lower() { } Sets.emplace_back(I, MaxUniqueId); } - llvm::sort(Sets, - [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1, - const std::pair<GlobalClassesTy::iterator, unsigned> &S2) { - return S1.second < S2.second; - }); + llvm::sort(Sets, llvm::less_second()); // For each disjoint set we found... for (const auto &S : Sets) { diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 97ef872c5499..b850591b4aa6 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -88,12 +88,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -113,7 +112,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/IR/ValueMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -121,8 +119,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #include <cassert> #include <iterator> @@ -139,10 +137,10 @@ STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); -static cl::opt<unsigned> NumFunctionsForSanityCheck( - "mergefunc-sanity", - cl::desc("How many functions in module could be used for " - "MergeFunctions pass sanity check. " +static cl::opt<unsigned> NumFunctionsForVerificationCheck( + "mergefunc-verify", + cl::desc("How many functions in a module could be used for " + "MergeFunctions to pass a basic correctness check. " "'0' disables this check. Works only with '-debug' key."), cl::init(0), cl::Hidden); @@ -228,10 +226,13 @@ private: /// analyzed again. std::vector<WeakTrackingVH> Deferred; + /// Set of values marked as used in llvm.used and llvm.compiler.used. + SmallPtrSet<GlobalValue *, 4> Used; + #ifndef NDEBUG /// Checks the rules of order relation introduced among functions set. - /// Returns true, if sanity check has been passed, and false if failed. - bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist); + /// Returns true, if check has been passed, and false if failed. + bool doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist); #endif /// Insert a ComparableFunction into the FnTree, or merge it away if it's @@ -330,12 +331,12 @@ PreservedAnalyses MergeFunctionsPass::run(Module &M, } #ifndef NDEBUG -bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { - if (const unsigned Max = NumFunctionsForSanityCheck) { +bool MergeFunctions::doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist) { + if (const unsigned Max = NumFunctionsForVerificationCheck) { unsigned TripleNumber = 0; bool Valid = true; - dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; + dbgs() << "MERGEFUNC-VERIFY: Started for first " << Max << " functions.\n"; unsigned i = 0; for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(), @@ -351,7 +352,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { - dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber + dbgs() << "MERGEFUNC-VERIFY: Non-symmetric; triple: " << TripleNumber << "\n"; dbgs() << *F1 << '\n' << *F2 << '\n'; Valid = false; @@ -384,7 +385,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { } if (!Transitive) { - dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " + dbgs() << "MERGEFUNC-VERIFY: Non-transitive; triple: " << TripleNumber << "\n"; dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " << Res4 << "\n"; @@ -395,7 +396,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { } } - dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; + dbgs() << "MERGEFUNC-VERIFY: " << (Valid ? "Passed." : "Failed.") << "\n"; return Valid; } return true; @@ -410,6 +411,11 @@ static bool isEligibleForMerging(Function &F) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; + SmallVector<GlobalValue *, 4> UsedV; + collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false); + collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/true); + Used.insert(UsedV.begin(), UsedV.end()); + // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. std::vector<std::pair<FunctionComparator::FunctionHash, Function *>> @@ -436,7 +442,7 @@ bool MergeFunctions::runOnModule(Module &M) { std::vector<WeakTrackingVH> Worklist; Deferred.swap(Worklist); - LLVM_DEBUG(doSanityCheck(Worklist)); + LLVM_DEBUG(doFunctionalCheck(Worklist)); LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n'); LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); @@ -456,6 +462,7 @@ bool MergeFunctions::runOnModule(Module &M) { FnTree.clear(); FNodesInTree.clear(); GlobalNumbers.clear(); + Used.clear(); return Changed; } @@ -484,7 +491,7 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { if (SrcTy->isStructTy()) { assert(DestTy->isStructTy()); assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); - Value *Result = UndefValue::get(DestTy); + Value *Result = PoisonValue::get(DestTy); for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { Value *Element = createCast( Builder, Builder.CreateExtractValue(V, makeArrayRef(I)), @@ -828,7 +835,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { // For better debugability, under MergeFunctionsPDI, we do not modify G's // call sites to point to F even when within the same translation unit. if (!G->isInterposable() && !MergeFunctionsPDI) { - if (G->hasGlobalUnnamedAddr()) { + // Functions referred to by llvm.used/llvm.compiler.used are special: + // there are uses of the symbol name that are not visible to LLVM, + // usually from inline asm. + if (G->hasGlobalUnnamedAddr() && !Used.contains(G)) { // G might have been a key in our GlobalNumberState, and it's illegal // to replace a key in ValueMap<GlobalValue *> with a non-global. GlobalNumbers.erase(G); diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index d515303e4911..143715006512 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -14,43 +14,33 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ModuleInliner.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include <cassert> -#include <functional> using namespace llvm; @@ -94,7 +84,9 @@ InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM, // inliner pass, and thus the lifetime of the owned advisor. The one we // would get from the MAM can be invalidated as a result of the inliner's // activity. - OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params); + OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>( + M, FAM, Params, + InlineContext{LTOPhase, InlinePass::ModuleInliner}); return *OwnedAdvisor; } @@ -119,7 +111,9 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n"); auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M); - if (!IAA.tryCreate(Params, Mode, {})) { + if (!IAA.tryCreate( + Params, Mode, {}, + InlineContext{LTOPhase, InlinePass::ModuleInliner})) { M.getContext().emitError( "Could not setup Inlining Advisor for the requested " "mode and/or options"); @@ -153,7 +147,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // the SCC inliner, which need some refactoring. std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls; if (InlineEnablePriorityOrder) - Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>(); + Calls = std::make_unique<PriorityInlineOrder>( + std::make_unique<SizePriority>()); else Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>(); assert(Calls != nullptr && "Expected an initialized InlineOrder"); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 2d765fb6ce6d..227ad8501f25 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -49,7 +49,6 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" -#include "llvm/Transforms/Utils/CodeExtractor.h" #include <algorithm> @@ -59,17 +58,16 @@ using namespace omp; #define DEBUG_TYPE "openmp-opt" static cl::opt<bool> DisableOpenMPOptimizations( - "openmp-opt-disable", cl::ZeroOrMore, - cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, - cl::init(false)); + "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."), + cl::Hidden, cl::init(false)); static cl::opt<bool> EnableParallelRegionMerging( - "openmp-opt-enable-merging", cl::ZeroOrMore, + "openmp-opt-enable-merging", cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false)); static cl::opt<bool> - DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, + DisableInternalization("openmp-opt-disable-internalization", cl::desc("Disable function internalization."), cl::Hidden, cl::init(false)); @@ -85,42 +83,47 @@ static cl::opt<bool> HideMemoryTransferLatency( cl::Hidden, cl::init(false)); static cl::opt<bool> DisableOpenMPOptDeglobalization( - "openmp-opt-disable-deglobalization", cl::ZeroOrMore, + "openmp-opt-disable-deglobalization", cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false)); static cl::opt<bool> DisableOpenMPOptSPMDization( - "openmp-opt-disable-spmdization", cl::ZeroOrMore, + "openmp-opt-disable-spmdization", cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false)); static cl::opt<bool> DisableOpenMPOptFolding( - "openmp-opt-disable-folding", cl::ZeroOrMore, + "openmp-opt-disable-folding", cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, cl::init(false)); static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( - "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, + "openmp-opt-disable-state-machine-rewrite", cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false)); static cl::opt<bool> DisableOpenMPOptBarrierElimination( - "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore, + "openmp-opt-disable-barrier-elimination", cl::desc("Disable OpenMP optimizations that eliminate barriers."), cl::Hidden, cl::init(false)); static cl::opt<bool> PrintModuleAfterOptimizations( - "openmp-opt-print-module", cl::ZeroOrMore, + "openmp-opt-print-module-after", cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintModuleBeforeOptimizations( + "openmp-opt-print-module-before", + cl::desc("Print the current module before OpenMP optimizations."), + cl::Hidden, cl::init(false)); + static cl::opt<bool> AlwaysInlineDeviceFunctions( - "openmp-opt-inline-device", cl::ZeroOrMore, + "openmp-opt-inline-device", cl::desc("Inline all applicible functions on the device."), cl::Hidden, cl::init(false)); static cl::opt<bool> - EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore, + EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false)); @@ -129,6 +132,11 @@ static cl::opt<unsigned> cl::desc("Maximal number of attributor iterations."), cl::init(256)); +static cl::opt<unsigned> + SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden, + cl::desc("Maximum amount of shared memory to use."), + cl::init(std::numeric_limits<unsigned>::max())); + STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, @@ -493,11 +501,14 @@ struct OMPInformationCache : public InformationCache { // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` // functions, except if `optnone` is present. - for (Function &F : M) { - for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) - if (F.getName().startswith(Prefix) && - !F.hasFnAttribute(Attribute::OptimizeNone)) - F.removeFnAttr(Attribute::NoInline); + if (isOpenMPDevice(M)) { + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.hasFnAttribute(Attribute::NoInline) && + F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } } // TODO: We should attach the attributes defined in OMPKinds.def. @@ -591,7 +602,7 @@ struct KernelInfoState : AbstractState { /// Abstract State interface ///{ - KernelInfoState() {} + KernelInfoState() = default; KernelInfoState(bool BestState) { if (!BestState) indicatePessimisticFixpoint(); @@ -926,8 +937,7 @@ private: SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; BasicBlock *StartBB = nullptr, *EndBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -966,8 +976,7 @@ private: const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationIP) { + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -1107,10 +1116,8 @@ private: // callbacks. SmallVector<Value *, 8> Args; for (auto *CI : MergableCIs) { - Value *Callee = - CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); - FunctionType *FT = - cast<FunctionType>(Callee->getType()->getPointerElementType()); + Value *Callee = CI->getArgOperand(CallbackCalleeOperand); + FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask; Args.clear(); Args.push_back(OutlinedFn->getArg(0)); Args.push_back(OutlinedFn->getArg(1)); @@ -1458,7 +1465,6 @@ private: case Intrinsic::nvvm_barrier0_and: case Intrinsic::nvvm_barrier0_or: case Intrinsic::nvvm_barrier0_popc: - case Intrinsic::amdgcn_s_barrier: return true; default: break; @@ -2120,6 +2126,8 @@ private: OMPRTL___kmpc_barrier_simple_generic); ExternalizationRAII ThreadId(OMPInfoCache, OMPRTL___kmpc_get_hardware_thread_id_in_block); + ExternalizationRAII NumThreads( + OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block); ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size); registerAAs(IsModulePass); @@ -2407,8 +2415,7 @@ struct AAICVTrackerFunction : public AAICVTracker { auto CallCheck = [&](Instruction &I) { Optional<Value *> ReplVal = getValueForCall(A, I, ICV); - if (ReplVal.hasValue() && - ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) + if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) HasChanged = ChangeStatus::CHANGED; return true; @@ -2468,7 +2475,8 @@ struct AAICVTrackerFunction : public AAICVTracker { if (ICVTrackingAA.isAssumedTracked()) { Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV); - if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache))) + if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I), + OMPInfoCache))) return URV; } @@ -2509,13 +2517,13 @@ struct AAICVTrackerFunction : public AAICVTracker { if (ValuesMap.count(CurrInst)) { Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); // Unknown value, track new. - if (!ReplVal.hasValue()) { + if (!ReplVal) { ReplVal = NewReplVal; break; } // If we found a new value, we can't know the icv value anymore. - if (NewReplVal.hasValue()) + if (NewReplVal) if (ReplVal != NewReplVal) return nullptr; @@ -2523,11 +2531,11 @@ struct AAICVTrackerFunction : public AAICVTracker { } Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV); - if (!NewReplVal.hasValue()) + if (!NewReplVal) continue; // Unknown value, track new. - if (!ReplVal.hasValue()) { + if (!ReplVal) { ReplVal = NewReplVal; break; } @@ -2539,7 +2547,7 @@ struct AAICVTrackerFunction : public AAICVTracker { } // If we are in the same BB and we have a value, we are done. - if (CurrBB == I->getParent() && ReplVal.hasValue()) + if (CurrBB == I->getParent() && ReplVal) return ReplVal; // Go through all predecessors and add terminators for analysis. @@ -2597,7 +2605,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker { ICVTrackingAA.getReplacementValue(ICV, &I, A); // If we found a second ICV value there is no unique returned value. - if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) + if (UniqueICVValue && UniqueICVValue != NewReplVal) return false; UniqueICVValue = NewReplVal; @@ -2648,10 +2656,10 @@ struct AAICVTrackerCallSite : AAICVTracker { } ChangeStatus manifest(Attributor &A) override { - if (!ReplVal.hasValue() || !ReplVal.getValue()) + if (!ReplVal || !*ReplVal) return ChangeStatus::UNCHANGED; - A.changeValueAfterManifest(*getCtxI(), **ReplVal); + A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal); A.deleteAfterManifest(*getCtxI()); return ChangeStatus::CHANGED; @@ -2789,7 +2797,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs; /// Total number of basic blocks in this function. - long unsigned NumBBs; + long unsigned NumBBs = 0; }; ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { @@ -2952,12 +2960,23 @@ struct AAHeapToSharedFunction : public AAHeapToShared { } void initialize(Attributor &A) override { + if (DisableOpenMPOptDeglobalization) { + indicatePessimisticFixpoint(); + return; + } + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; + Attributor::SimplifictionCallbackTy SCB = + [](const IRPosition &, const AbstractAttribute *, + bool &) -> Optional<Value *> { return nullptr; }; for (User *U : RFI.Declaration->users()) - if (CallBase *CB = dyn_cast<CallBase>(U)) + if (CallBase *CB = dyn_cast<CallBase>(U)) { MallocCalls.insert(CB); + A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), + SCB); + } findPotentialRemovedFreeCalls(A); } @@ -2999,6 +3018,14 @@ struct AAHeapToSharedFunction : public AAHeapToShared { auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0)); + if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) { + LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB + << " with shared memory." + << " Shared memory usage is limited to " + << SharedMemoryLimit << " bytes\n"); + continue; + } + LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue() << " bytes of shared memory\n"); @@ -3029,11 +3056,12 @@ struct AAHeapToSharedFunction : public AAHeapToShared { "HeapToShared on allocation without alignment attribute"); SharedMem->setAlignment(MaybeAlign(Alignment)); - A.changeValueAfterManifest(*CB, *NewBuffer); + A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer); A.deleteAfterManifest(*CB); A.deleteAfterManifest(*FreeCalls.front()); - NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); + SharedMemoryUsed += AllocSize->getZExtValue(); + NumBytesMovedToSharedMemory = SharedMemoryUsed; Changed = ChangeStatus::CHANGED; } @@ -3069,6 +3097,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared { SmallSetVector<CallBase *, 4> MallocCalls; /// Collection of potentially removed free calls in a function. SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls; + /// The total amount of shared memory that has been used for HeapToShared. + unsigned SharedMemoryUsed = 0; }; struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> { @@ -3137,12 +3167,6 @@ struct AAKernelInfoFunction : AAKernelInfo { auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); Function *Fn = getAnchorScope(); - if (!OMPInfoCache.Kernels.count(Fn)) - return; - - // Add itself to the reaching kernel and set IsKernelEntry. - ReachingKernelEntries.insert(Fn); - IsKernelEntry = true; OMPInformationCache::RuntimeFunctionInfo &InitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; @@ -3176,10 +3200,12 @@ struct AAKernelInfoFunction : AAKernelInfo { Fn); // Ignore kernels without initializers such as global constructors. - if (!KernelInitCB || !KernelDeinitCB) { - indicateOptimisticFixpoint(); + if (!KernelInitCB || !KernelDeinitCB) return; - } + + // Add itself to the reaching kernel and set IsKernelEntry. + ReachingKernelEntries.insert(Fn); + IsKernelEntry = true; // For kernels we might need to initialize/finalize the IsSPMD state and // we need to register a simplification callback so that the Attributor @@ -3345,8 +3371,17 @@ struct AAKernelInfoFunction : AAKernelInfo { return false; } - // Check if the kernel is already in SPMD mode, if so, return success. + // Get the actual kernel, could be the caller of the anchor scope if we have + // a debug wrapper. Function *Kernel = getAnchorScope(); + if (Kernel->hasLocalLinkage()) { + assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper."); + auto *CB = cast<CallBase>(Kernel->user_back()); + Kernel = CB->getCaller(); + } + assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!"); + + // Check if the kernel is already in SPMD mode, if so, return success. GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( (Kernel->getName() + "_exec_mode").str()); assert(ExecMode && "Kernel without exec mode?"); @@ -3711,9 +3746,9 @@ struct AAKernelInfoFunction : AAKernelInfo { // __kmpc_get_hardware_num_threads_in_block(); // WarpSize = __kmpc_get_warp_size(); // BlockSize = BlockHwSize - WarpSize; - // if (InitCB >= BlockSize) return; - // IsWorkerCheckBB: bool IsWorker = InitCB >= 0; + // IsWorkerCheckBB: bool IsWorker = InitCB != -1; // if (IsWorker) { + // if (InitCB >= BlockSize) return; // SMBeginBB: __kmpc_barrier_simple_generic(...); // void *WorkFn; // bool Active = __kmpc_kernel_parallel(&WorkFn); @@ -3770,6 +3805,13 @@ struct AAKernelInfoFunction : AAKernelInfo { ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); InitBB->getTerminator()->eraseFromParent(); + Instruction *IsWorker = + ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, + ConstantInt::get(KernelInitCB->getType(), -1), + "thread.is_worker", InitBB); + IsWorker->setDebugLoc(DLoc); + BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); + Module &M = *Kernel->getParent(); auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); FunctionCallee BlockHwSizeFn = @@ -3779,29 +3821,22 @@ struct AAKernelInfoFunction : AAKernelInfo { OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_warp_size); CallInst *BlockHwSize = - CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB); + CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize); BlockHwSize->setDebugLoc(DLoc); - CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB); + CallInst *WarpSize = + CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize); WarpSize->setDebugLoc(DLoc); - Instruction *BlockSize = - BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB); + Instruction *BlockSize = BinaryOperator::CreateSub( + BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB); BlockSize->setDebugLoc(DLoc); - Instruction *IsMainOrWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, - BlockSize, "thread.is_main_or_worker", InitBB); + Instruction *IsMainOrWorker = ICmpInst::Create( + ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, + "thread.is_main_or_worker", IsWorkerCheckBB); IsMainOrWorker->setDebugLoc(DLoc); - BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker, - InitBB); - - Instruction *IsWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, - ConstantInt::get(KernelInitCB->getType(), -1), - "thread.is_worker", IsWorkerCheckBB); - IsWorker->setDebugLoc(DLoc); - BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, - IsWorkerCheckBB); + BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, + IsMainOrWorker, IsWorkerCheckBB); // Create local storage for the work function pointer. const DataLayout &DL = M.getDataLayout(); @@ -4241,10 +4276,10 @@ struct AAKernelInfoCallSite : AAKernelInfo { unsigned ScheduleTypeVal = ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; switch (OMPScheduleType(ScheduleTypeVal)) { - case OMPScheduleType::Static: - case OMPScheduleType::StaticChunked: - case OMPScheduleType::Distribute: - case OMPScheduleType::DistributeChunked: + case OMPScheduleType::UnorderedStatic: + case OMPScheduleType::UnorderedStaticChunked: + case OMPScheduleType::OrderedDistribute: + case OMPScheduleType::OrderedDistributeChunked: break; default: SPMDCompatibilityTracker.indicatePessimisticFixpoint(); @@ -4390,7 +4425,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { std::string Str("simplified value: "); - if (!SimplifiedValue.hasValue()) + if (!SimplifiedValue) return Str + std::string("none"); if (!SimplifiedValue.getValue()) @@ -4420,8 +4455,8 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { IRPosition::callsite_returned(CB), [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional<Value *> { - assert((isValidState() || (SimplifiedValue.hasValue() && - SimplifiedValue.getValue() == nullptr)) && + assert((isValidState() || + (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) && "Unexpected invalid state!"); if (!isAtFixpoint()) { @@ -4461,9 +4496,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; - if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) { + if (SimplifiedValue && *SimplifiedValue) { Instruction &I = *getCtxI(); - A.changeValueAfterManifest(I, **SimplifiedValue); + A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue); A.deleteAfterManifest(I); CallBase *CB = dyn_cast<CallBase>(&I); @@ -4549,7 +4584,7 @@ private: // We have empty reaching kernels, therefore we cannot tell if the // associated call site can be folded. At this moment, SimplifiedValue // must be none. - assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none"); + assert(!SimplifiedValue && "SimplifiedValue should be none"); } return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED @@ -4592,7 +4627,7 @@ private: return indicatePessimisticFixpoint(); if (CallerKernelInfoAA.ReachingKernelEntries.empty()) { - assert(!SimplifiedValue.hasValue() && + assert(!SimplifiedValue && "SimplifiedValue should keep none at this point"); return ChangeStatus::UNCHANGED; } @@ -4700,18 +4735,23 @@ void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { void OpenMPOpt::registerAAs(bool IsModulePass) { if (SCC.empty()) - return; + if (IsModulePass) { // Ensure we create the AAKernelInfo AAs first and without triggering an // update. This will make sure we register all value simplification // callbacks before any other AA has the chance to create an AAValueSimplify // or similar. - for (Function *Kernel : OMPInfoCache.Kernels) + auto CreateKernelInfoCB = [&](Use &, Function &Kernel) { A.getOrCreateAAFor<AAKernelInfo>( - IRPosition::function(*Kernel), /* QueryingAA */ nullptr, + IRPosition::function(Kernel), /* QueryingAA */ nullptr, DepClassTy::NONE, /* ForceUpdate */ false, /* UpdateAfterInit */ false); + return false; + }; + OMPInformationCache::RuntimeFunctionInfo &InitRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; + InitRFI.foreachUse(SCC, CreateKernelInfoCB); registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id); registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); @@ -4899,6 +4939,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); KernelSet Kernels = getDeviceKernels(M); + if (PrintModuleBeforeOptimizations) + LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M); + auto IsCalled = [&](Function &F) { if (Kernels.contains(&F)) return true; @@ -4958,8 +5001,15 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, - MaxFixpointIterations, OREGetter, DEBUG_TYPE); + + AttributorConfig AC(CGUpdater); + AC.DefaultInitializeLiveInternals = false; + AC.RewriteSignatures = false; + AC.MaxFixpointIterations = MaxFixpointIterations; + AC.OREGetter = OREGetter; + AC.PassName = DEBUG_TYPE; + + Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -5001,6 +5051,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, Module &M = *C.begin()->getFunction().getParent(); + if (PrintModuleBeforeOptimizations) + LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M); + KernelSet Kernels = getDeviceKernels(M); FunctionAnalysisManager &FAM = @@ -5022,8 +5075,16 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, - MaxFixpointIterations, OREGetter, DEBUG_TYPE); + + AttributorConfig AC(CGUpdater); + AC.DefaultInitializeLiveInternals = false; + AC.IsModulePass = false; + AC.RewriteSignatures = false; + AC.MaxFixpointIterations = MaxFixpointIterations; + AC.OREGetter = OREGetter; + AC.PassName = DEBUG_TYPE; + + Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -5093,8 +5154,16 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, - MaxFixpointIterations, OREGetter, DEBUG_TYPE); + + AttributorConfig AC(CGUpdater); + AC.DefaultInitializeLiveInternals = false; + AC.IsModulePass = false; + AC.RewriteSignatures = false; + AC.MaxFixpointIterations = MaxFixpointIterations; + AC.OREGetter = OREGetter; + AC.PassName = DEBUG_TYPE; + + Attributor A(Functions, InfoCache, AC); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Result = OMPOpt.run(false); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 5f2223e4047e..54c72bdbb203 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -14,7 +14,6 @@ #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -40,6 +39,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -55,8 +55,6 @@ #include <algorithm> #include <cassert> #include <cstdint> -#include <functional> -#include <iterator> #include <memory> #include <tuple> #include <vector> @@ -99,7 +97,7 @@ static cl::opt<bool> // This is an option used by testing: static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis", - cl::init(false), cl::ZeroOrMore, + cl::ReallyHidden, cl::desc("Skip Cost Analysis")); // Used to determine if a cold region is worth outlining based on @@ -129,7 +127,7 @@ static cl::opt<unsigned> MaxNumInlineBlocks( // Command line option to set the maximum number of partial inlining allowed // for the module. The default value of -1 means no limit. static cl::opt<int> MaxNumPartialInlining( - "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, + "max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited")); // Used only when PGO or user annotated branch data is absent. It is @@ -137,7 +135,7 @@ static cl::opt<int> MaxNumPartialInlining( // produces larger value, the BFI value will be used. static cl::opt<int> OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), - cl::Hidden, cl::ZeroOrMore, + cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block")); @@ -169,7 +167,7 @@ struct FunctionOutliningInfo { }; struct FunctionOutliningMultiRegionInfo { - FunctionOutliningMultiRegionInfo() {} + FunctionOutliningMultiRegionInfo() = default; // Container for outline regions struct OutlineRegionInfo { @@ -440,7 +438,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo( }; auto BBProfileCount = [BFI](BasicBlock *BB) { - return BFI->getBlockProfileCount(BB).getValueOr(0); + return BFI->getBlockProfileCount(BB).value_or(0); }; // Use the same computeBBInlineCost function to compute the cost savings of @@ -741,7 +739,7 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( auto OutlineRegionRelFreq = BranchProbability::getBranchProbability( OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); - if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get())) + if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI)) return OutlineRegionRelFreq; // When profile data is not available, we need to be conservative in diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 74f68531b89a..ae787be40c55 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -15,19 +15,13 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Verifier.h" -#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/CGPassBuilderOption.h" @@ -41,22 +35,16 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" -#include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Transforms/Vectorize/LoopVectorize.h" -#include "llvm/Transforms/Vectorize/SLPVectorizer.h" -#include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; namespace llvm { -cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false), - cl::Hidden, cl::ZeroOrMore, +cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::Hidden, cl::desc("Run Partial inlinining pass")); static cl::opt<bool> @@ -111,8 +99,8 @@ static cl::opt<bool> EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable performing ThinLTO.")); -cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), - cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); +cl::opt<bool> EnableHotColdSplit("hot-cold-split", + cl::desc("Enable hot-cold splitting pass")); cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass")); @@ -126,12 +114,12 @@ cl::opt<bool> cl::desc("Disable pre-instrumentation inliner")); cl::opt<int> PreInlineThreshold( - "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + "preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); cl::opt<bool> - EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore, + EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt<bool> @@ -139,13 +127,8 @@ static cl::opt<bool> cl::Hidden, cl::desc("Disable shrink-wrap library calls")); -static cl::opt<bool> EnableSimpleLoopUnswitch( - "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, - cl::desc("Enable the simple loop unswitch pass. Also enables independent " - "cleanup passes integrated into the loop pass manager pipeline.")); - cl::opt<bool> - EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore, + EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)")); // This option is used in simplifying testing SampleFDO optimizations for @@ -336,59 +319,6 @@ void PassManagerBuilder::populateFunctionPassManager( FPM.add(createEarlyCSEPass()); } -// Do PGO instrumentation generation or use pass as the option specified. -void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, - bool IsCS = false) { - if (IsCS) { - if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) - return; - } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) - return; - - // Perform the preinline and cleanup passes for O1 and above. - // We will not do this inline for context sensitive PGO (when IsCS is true). - if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) { - // Create preinline pass. We construct an InlineParams object and specify - // the threshold here to avoid the command line options of the regular - // inliner to influence pre-inlining. The only fields of InlineParams we - // care about are DefaultThreshold and HintThreshold. - InlineParams IP; - IP.DefaultThreshold = PreInlineThreshold; - // FIXME: The hint threshold has the same value used by the regular inliner - // when not optimzing for size. This should probably be lowered after - // performance testing. - // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes - // the instrumented binary unusably large. Even if PreInlineThreshold is not - // correct thresold for -Oz, it is better than not running preinliner. - IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325; - - MPM.add(createFunctionInliningPass(IP)); - MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Combine silly seq's - addExtensionsToPM(EP_Peephole, MPM); - } - if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { - MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); - // Add the profile lowering pass. - InstrProfOptions Options; - if (!PGOInstrGen.empty()) - Options.InstrProfileOutput = PGOInstrGen; - Options.DoCounterPromotion = true; - Options.UseBFIInPromotion = IsCS; - MPM.add(createLoopRotatePass()); - MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); - } - if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); - // Indirect call promotion that promotes intra-module targets only. - // For ThinLTO this is done earlier due to interactions with globalopt - // for imported functions. We don't run this at -O0. - if (OptLevel > 0 && !IsCS) - MPM.add( - createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); -} void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. @@ -404,7 +334,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createGVNHoistPass()); if (EnableGVNSink) { MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } } @@ -418,7 +349,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals } - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); @@ -427,14 +360,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLibCallsShrinkWrapPass()); addExtensionsToPM(EP_Peephole, MPM); - // Optimize memory intrinsic calls based on the profiled size information. - if (SizeLevel == 0) - MPM.add(createPGOMemOPSizeOptLegacyPass()); - // TODO: Investigate the cost/benefit of tail call elimination on debugging. if (OptLevel > 1) MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // The matrix extension can introduce large vector operations early, which can @@ -443,29 +374,32 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createVectorCombinePass()); // Begin the loop pass pipeline. - if (EnableSimpleLoopUnswitch) { - // The simple loop unswitch pass relies on separate cleanup passes. Schedule - // them first so when we re-process a loop they run before other loop - // passes. - MPM.add(createLoopInstSimplifyPass()); - MPM.add(createLoopSimplifyCFGPass()); - } + + // The simple loop unswitch pass relies on separate cleanup passes. Schedule + // them first so when we re-process a loop they run before other loop + // passes. + MPM.add(createLoopInstSimplifyPass()); + MPM.add(createLoopSimplifyCFGPass()); + // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - if (EnableSimpleLoopUnswitch) - MPM.add(createSimpleLoopUnswitchLegacyPass()); - else - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3)); // FIXME: We break the loop pass pipeline here in order to do full // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. if (EnableLoopFlatten) { @@ -521,7 +455,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -580,9 +515,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, PM.add(createEarlyCSEPass()); PM.add(createCorrelatedValuePropagationPass()); PM.add(createInstructionCombiningPass()); - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - PM.add(createCFGSimplificationPass()); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + PM.add(createSimpleLoopUnswitchLegacyPass()); + PM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); PM.add(createInstructionCombiningPass()); } @@ -597,6 +534,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // before SLP vectorization. PM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -641,7 +579,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } PM.add(createWarnMissedTransformationsPass()); @@ -657,10 +596,6 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { - // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link - // is handled separately, so just check this is not the ThinLTO post-link. - bool DefaultOrPreLinkPipeline = !PerformThinLTO; - MPM.add(createAnnotation2MetadataLegacyPass()); if (!PGOSampleUse.empty()) { @@ -678,7 +613,6 @@ void PassManagerBuilder::populateModulePassManager( // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { - addPGOInstrPasses(MPM); if (Inliner) { MPM.add(Inliner); Inliner = nullptr; @@ -732,8 +666,6 @@ void PassManagerBuilder::populateModulePassManager( // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. if (PerformThinLTO) { - MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, - !PGOSampleUse.empty())); MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); } @@ -772,20 +704,9 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - - // For SamplePGO in ThinLTO compile phase, we do not want to do indirect - // call promotion as it will change the CFG too much to make the 2nd - // profile annotation in backend more difficult. - // PGO instrumentation is added during the compile phase for ThinLTO, do - // not run it a second time - if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) - addPGOInstrPasses(MPM); - - // Create profile COMDAT variables. Lld linker wants to see all variables - // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. - if (!PerformThinLTO && EnablePGOCSInstrGen) - MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Clean up after IPCP & DAE // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive @@ -811,8 +732,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createOpenMPOptCGSCCLegacyPass()); MPM.add(createPostOrderFunctionAttrsLegacyPass()); - if (OptLevel > 2) - MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); addFunctionSimplificationPasses(MPM); @@ -837,14 +756,6 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); - // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass - // for LTO and ThinLTO -- The actual pass will be called after all inlines - // are performed. - // Need to do this after COMDAT variables have been eliminated, - // (i.e. after EliminateAvailableExternallyPass). - if (!(PrepareForLTO || PrepareForThinLTO)) - addPGOInstrPasses(MPM, /* IsCS */ true); - if (EnableOrderFileInstrumentation) MPM.add(createInstrOrderFilePass()); @@ -886,7 +797,8 @@ void PassManagerBuilder::populateModulePassManager( // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -972,7 +884,8 @@ void PassManagerBuilder::populateModulePassManager( // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); addExtensionsToPM(EP_OptimizerLast, MPM); @@ -1009,13 +922,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Split call-site with more constrained arguments. PM.add(createCallSiteSplittingPass()); - // Indirect call promotion. This should promote all the targets that are - // left by the earlier promotion pass that promotes intra-module targets. - // This two-step promotion is to save the compile time. For LTO, it should - // produce the same result as if we only do promotion here. - PM.add( - createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); - // Propage constant function arguments by specializing the functions. if (EnableFunctionSpecialization && OptLevel > 2) PM.add(createFunctionSpecializationPass()); @@ -1081,9 +987,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createPruneEHPass()); // Remove dead EH info. - // CSFDO instrumentation and use pass. - addPGOInstrPasses(PM, /* IsCS */ true); - // Infer attributes on declarations, call sites, arguments, etc. for an SCC. if (AttributorRun & AttributorRunOption::CGSCC) PM.add(createAttributorCGSCCLegacyPass()); @@ -1098,14 +1001,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createGlobalOptimizerPass()); PM.add(createGlobalDCEPass()); // Remove dead functions. - // If we didn't decide to inline a function, check to see if we can - // transform it to pass arguments by value instead of by reference. - PM.add(createArgumentPromotionPass()); - // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); + PM.add(createJumpThreadingPass()); // Break up allocas PM.add(createSROAPass()); @@ -1120,7 +1019,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. @@ -1149,7 +1049,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { addExtensionsToPM(EP_Peephole, PM); - PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); + PM.add(createJumpThreadingPass()); } void PassManagerBuilder::addLateLTOOptimizationPasses( @@ -1175,80 +1075,6 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( PM.add(createMergeFunctionsPass()); } -void PassManagerBuilder::populateThinLTOPassManager( - legacy::PassManagerBase &PM) { - PerformThinLTO = true; - if (LibraryInfo) - PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - - if (VerifyInput) - PM.add(createVerifierPass()); - - if (ImportSummary) { - // This pass imports type identifier resolutions for whole-program - // devirtualization and CFI. It must run early because other passes may - // disturb the specific instruction patterns that these passes look for, - // creating dependencies on resolutions that may not appear in the summary. - // - // For example, GVN may transform the pattern assume(type.test) appearing in - // two basic blocks into assume(phi(type.test, type.test)), which would - // transform a dependency on a WPD resolution into a dependency on a type - // identifier resolution for CFI. - // - // Also, WPD has access to more precise information than ICP and can - // devirtualize more effectively, so it should operate on the IR first. - PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary)); - PM.add(createLowerTypeTestsPass(nullptr, ImportSummary)); - } - - populateModulePassManager(PM); - - if (VerifyOutput) - PM.add(createVerifierPass()); - PerformThinLTO = false; -} - -void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { - if (LibraryInfo) - PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - - if (VerifyInput) - PM.add(createVerifierPass()); - - addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM); - - if (OptLevel != 0) - addLTOOptimizationPasses(PM); - else { - // The whole-program-devirt pass needs to run at -O0 because only it knows - // about the llvm.type.checked.load intrinsic: it needs to both lower the - // intrinsic itself and handle it in the summary. - PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); - } - - // Create a function that performs CFI checks for cross-DSO calls with targets - // in the current module. - PM.add(createCrossDSOCFIPass()); - - // Lower type metadata and the type.test intrinsic. This pass supports Clang's - // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at - // link time if CFI is enabled. The pass does nothing if CFI is disabled. - PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); - // Run a second time to clean up any type tests left behind by WPD for use - // in ICP (which is performed earlier than this in the regular LTO pipeline). - PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - - if (OptLevel != 0) - addLateLTOOptimizationPasses(PM); - - addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM); - - PM.add(createAnnotationRemarksLegacyPass()); - - if (VerifyOutput) - PM.add(createVerifierPass()); -} - LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); @@ -1314,18 +1140,3 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, legacy::PassManagerBase *MPM = unwrap(PM); Builder->populateModulePassManager(*MPM); } - -void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM, - LLVMBool Internalize, - LLVMBool RunInliner) { - PassManagerBuilder *Builder = unwrap(PMB); - legacy::PassManagerBase *LPM = unwrap(PM); - - // A small backwards compatibility hack. populateLTOPassManager used to take - // an RunInliner option. - if (RunInliner && !Builder->Inliner) - Builder->Inliner = createFunctionInliningPass(); - - Builder->populateLTOPassManager(*LPM); -} diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp index 39de19ca9e9d..e0836a9fd699 100644 --- a/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -24,9 +23,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/Local.h" @@ -246,7 +243,7 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) { } if (!I->use_empty()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); } if (TokenInst) { diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index 5779553ee732..26fb7d676429 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -18,6 +18,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" using namespace llvm; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 7334bf695b67..6859953de962 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -14,7 +14,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/ProfileData/SampleProf.h" #include <map> #include <queue> @@ -62,23 +63,24 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { return ChildNodeRet; } -ContextTrieNode &ContextTrieNode::moveToChildContext( - const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - uint32_t ContextFramesToRemove, bool DeleteNode) { +ContextTrieNode & +SampleContextTracker::moveContextSamples(ContextTrieNode &ToNodeParent, + const LineLocation &CallSite, + ContextTrieNode &&NodeToMove) { uint64_t Hash = FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite); + std::map<uint64_t, ContextTrieNode> &AllChildContext = + ToNodeParent.getAllChildContext(); assert(!AllChildContext.count(Hash) && "Node to remove must exist"); - LineLocation OldCallSite = NodeToMove.CallSiteLoc; - ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); AllChildContext[Hash] = NodeToMove; ContextTrieNode &NewNode = AllChildContext[Hash]; - NewNode.CallSiteLoc = CallSite; + NewNode.setCallSiteLoc(CallSite); // Walk through nodes in the moved the subtree, and update // FunctionSamples' context as for the context promotion. // We also need to set new parant link for all children. std::queue<ContextTrieNode *> NodeToUpdate; - NewNode.setParentContext(this); + NewNode.setParentContext(&ToNodeParent); NodeToUpdate.push(&NewNode); while (!NodeToUpdate.empty()) { @@ -87,10 +89,8 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( FunctionSamples *FSamples = Node->getFunctionSamples(); if (FSamples) { - FSamples->getContext().promoteOnPath(ContextFramesToRemove); + setContextNode(FSamples, Node); FSamples->getContext().setState(SyntheticContext); - LLVM_DEBUG(dbgs() << " Context promoted to: " - << FSamples->getContext().toString() << "\n"); } for (auto &It : Node->getAllChildContext()) { @@ -100,10 +100,6 @@ ContextTrieNode &ContextTrieNode::moveToChildContext( } } - // Original context no longer needed, destroy if requested. - if (DeleteNode) - OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); - return NewNode; } @@ -131,7 +127,7 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) { Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; } void ContextTrieNode::addFunctionSize(uint32_t FSize) { - if (!FuncSize.hasValue()) + if (!FuncSize) FuncSize = 0; FuncSize = FuncSize.getValue() + FSize; @@ -147,6 +143,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) { ParentContext = Parent; } +void ContextTrieNode::setCallSiteLoc(const LineLocation &Loc) { + CallSiteLoc = Loc; +} + void ContextTrieNode::dumpNode() { dbgs() << "Node: " << FuncName << "\n" << " Callsite: " << CallSiteLoc << "\n" @@ -202,13 +202,23 @@ SampleContextTracker::SampleContextTracker( SampleContext Context = FuncSample.first; LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString() << "\n"); - if (!Context.isBaseContext()) - FuncToCtxtProfiles[Context.getName()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); NewNode->setFunctionSamples(FSamples); } + populateFuncToCtxtMap(); +} + +void SampleContextTracker::populateFuncToCtxtMap() { + for (auto *Node : *this) { + FunctionSamples *FSamples = Node->getFunctionSamples(); + if (FSamples) { + FSamples->getContext().setState(RawContext); + setContextNode(FSamples, Node); + FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples); + } + } } FunctionSamples * @@ -231,7 +241,7 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); LLVM_DEBUG(if (FSamples) { - dbgs() << " Callee context found: " << FSamples->getContext().toString() + dbgs() << " Callee context found: " << getContextString(CalleeContext) << "\n"; }); return FSamples; @@ -333,7 +343,7 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) continue; - ContextTrieNode *FromNode = getContextFor(Context); + ContextTrieNode *FromNode = getContextNodeForProfile(CSamples); if (FromNode == Node) continue; @@ -354,7 +364,7 @@ void SampleContextTracker::markContextSamplesInlined( const FunctionSamples *InlinedSamples) { assert(InlinedSamples && "Expect non-null inlined samples"); LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " - << InlinedSamples->getContext().toString() << "\n"); + << getContextString(*InlinedSamples) << "\n"); InlinedSamples->getContext().setState(InlinedContext); } @@ -405,17 +415,43 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( // the context profile in the base (context-less) profile. FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); assert(FromSamples && "Shouldn't promote a context without profile"); + (void)FromSamples; // Unused in release build. + LLVM_DEBUG(dbgs() << " Found context tree root to promote: " - << FromSamples->getContext().toString() << "\n"); + << getContextString(&NodeToPromo) << "\n"); assert(!FromSamples->getContext().hasState(InlinedContext) && "Shouldn't promote inlined context profile"); - uint32_t ContextFramesToRemove = - FromSamples->getContext().getContextFrames().size() - 1; - return promoteMergeContextSamplesTree(NodeToPromo, RootContext, - ContextFramesToRemove); + return promoteMergeContextSamplesTree(NodeToPromo, RootContext); +} + +#ifndef NDEBUG +std::string +SampleContextTracker::getContextString(const FunctionSamples &FSamples) const { + return getContextString(getContextNodeForProfile(&FSamples)); } +std::string +SampleContextTracker::getContextString(ContextTrieNode *Node) const { + SampleContextFrameVector Res; + if (Node == &RootContext) + return std::string(); + Res.emplace_back(Node->getFuncName(), LineLocation(0, 0)); + + ContextTrieNode *PreNode = Node; + Node = Node->getParentContext(); + while (Node && Node != &RootContext) { + Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc()); + PreNode = Node; + Node = Node->getParentContext(); + } + + std::reverse(Res.begin(), Res.end()); + + return SampleContext::getContextString(Res); +} +#endif + void SampleContextTracker::dump() { RootContext.dumpTree(); } StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const { @@ -526,8 +562,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) { } void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, - ContextTrieNode &ToNode, - uint32_t ContextFramesToRemove) { + ContextTrieNode &ToNode) { FunctionSamples *FromSamples = FromNode.getFunctionSamples(); FunctionSamples *ToSamples = ToNode.getFunctionSamples(); if (FromSamples && ToSamples) { @@ -540,16 +575,13 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, } else if (FromSamples) { // Transfer FromSamples from FromNode to ToNode ToNode.setFunctionSamples(FromSamples); + setContextNode(FromSamples, &ToNode); FromSamples->getContext().setState(SyntheticContext); - FromSamples->getContext().promoteOnPath(ContextFramesToRemove); - FromNode.setFunctionSamples(nullptr); } } ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( - ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, - uint32_t ContextFramesToRemove) { - assert(ContextFramesToRemove && "Context to remove can't be empty"); + ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) { // Ignore call site location if destination is top level under root LineLocation NewCallSiteLoc = LineLocation(0, 0); @@ -566,22 +598,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( if (!ToNode) { // Do not delete node to move from its parent here because // caller is iterating over children of that parent node. - ToNode = &ToNodeParent.moveToChildContext( - NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false); + ToNode = + &moveContextSamples(ToNodeParent, NewCallSiteLoc, std::move(FromNode)); + LLVM_DEBUG({ + dbgs() << " Context promoted and merged to: " << getContextString(ToNode) + << "\n"; + }); } else { // Destination node exists, merge samples for the context tree - mergeContextNode(FromNode, *ToNode, ContextFramesToRemove); + mergeContextNode(FromNode, *ToNode); LLVM_DEBUG({ if (ToNode->getFunctionSamples()) dbgs() << " Context promoted and merged to: " - << ToNode->getFunctionSamples()->getContext().toString() << "\n"; + << getContextString(ToNode) << "\n"; }); // Recursively promote and merge children for (auto &It : FromNode.getAllChildContext()) { ContextTrieNode &FromChildNode = It.second; - promoteMergeContextSamplesTree(FromChildNode, *ToNode, - ContextFramesToRemove); + promoteMergeContextSamplesTree(FromChildNode, *ToNode); } // Remove children once they're all merged @@ -594,4 +629,14 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( return *ToNode; } + +void SampleContextTracker::createContextLessProfileMap( + SampleProfileMap &ContextLessProfiles) { + for (auto *Node : *this) { + FunctionSamples *FProfile = Node->getFunctionSamples(); + // Profile's context can be empty, use ContextNode's func name. + if (FProfile) + ContextLessProfiles[Node->getFuncName()].merge(*FProfile); + } +} } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index bc6051de90c4..40de69bbf2cf 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -25,11 +25,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" @@ -38,22 +35,16 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" @@ -64,6 +55,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -73,9 +65,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ProfiledCallGraph.h" @@ -84,7 +74,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include <algorithm> @@ -151,8 +140,7 @@ static cl::opt<bool> ProfileSampleBlockAccurate( "them conservatively as unknown. ")); static cl::opt<bool> ProfileAccurateForSymsInList( - "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, - cl::init(true), + "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overriden by profile-sample-accurate. ")); @@ -183,6 +171,15 @@ static cl::opt<bool> ProfileSizeInline( cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size.")); +// Since profiles are consumed by many passes, turning on this option has +// side effects. For instance, pre-link SCC inliner would see merged profiles +// and inline the hot functions (that are skipped in this pass). +static cl::opt<bool> DisableSampleLoaderInlining( + "disable-sample-loader-inlining", cl::Hidden, cl::init(false), + cl::desc("If true, artifically skip inline transformation in sample-loader " + "pass, and merge (or scale) profiles (as configured by " + "--sample-profile-merge-inlinee).")); + cl::opt<int> ProfileInlineGrowthLimit( "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), cl::desc("The size growth ratio limit for proirity-based sample profile " @@ -219,19 +216,19 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip( "Skip relative hotness check for ICP up to given number of targets.")); static cl::opt<bool> CallsitePrioritizedInline( - "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "sample-profile-prioritized-inline", cl::Hidden, + cl::desc("Use call site prioritized inlining for sample profile loader." "Currently only CSSPGO is supported.")); static cl::opt<bool> UsePreInlinerDecision( - "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "sample-profile-use-preinliner", cl::Hidden, + cl::desc("Use the preinliner decisions stored in profile context.")); static cl::opt<bool> AllowRecursiveInline( - "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + "sample-profile-recursive-inline", cl::Hidden, + cl::desc("Allow sample loader inliner to inline recursive calls.")); static cl::opt<std::string> ProfileInlineReplayFile( @@ -287,7 +284,6 @@ static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat( static cl::opt<unsigned> MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden, - cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite in sample profile loader")); @@ -295,6 +291,13 @@ static cl::opt<bool> OverwriteExistingWeights( "overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite.")); +static cl::opt<bool> AnnotateSampleProfileInlinePhase( + "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), + cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " + "sample-profile inline pass name.")); + +extern cl::opt<bool> EnableExtTspBlockPlacement; + namespace { using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; @@ -425,7 +428,11 @@ public: : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), - LTOPhase(LTOPhase) {} + LTOPhase(LTOPhase), + AnnotatedPassName(AnnotateSampleProfileInlinePhase + ? llvm::AnnotateInlinePassName(InlineContext{ + LTOPhase, InlinePass::SampleProfileInliner}) + : CSINLINE_DEBUG) {} bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -487,15 +494,13 @@ protected: /// Profile tracker for different context. std::unique_ptr<SampleContextTracker> ContextTracker; - /// Flag indicating whether input profile is context-sensitive - bool ProfileIsCSFlat = false; - /// Flag indicating which LTO/ThinLTO phase the pass is invoked in. /// /// We need to know the LTO phase because for example in ThinLTOPrelink /// phase, in annotation, we should not promote indirect calls. Instead, /// we will mark GUIDs that needs to be annotated to the function. - ThinOrFullLTOPhase LTOPhase; + const ThinOrFullLTOPhase LTOPhase; + const std::string AnnotatedPassName; /// Profle Symbol list tells whether a function name appears in the binary /// used to generate the current profile. @@ -535,6 +540,11 @@ protected: // A pseudo probe helper to correlate the imported sample counts. std::unique_ptr<PseudoProbeManager> ProbeManager; + +private: + const char *getAnnotatedRemarkPassName() const { + return AnnotatedPassName.c_str(); + } }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -605,7 +615,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { // call instruction should have 0 count. // For CS profile, the callsite count of previously inlined callees is // populated with the entry count of the callees. - if (!ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) if (const auto *CB = dyn_cast<CallBase>(&Inst)) if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) return 0; @@ -644,7 +654,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { // call instruction should have 0 count. // For CS profile, the callsite count of previously inlined callees is // populated with the entry count of the callees. - if (!ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) if (const auto *CB = dyn_cast<CallBase>(&Inst)) if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) return 0; @@ -698,7 +708,7 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { if (Function *Callee = Inst.getCalledFunction()) CalleeName = Callee->getName(); - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); const FunctionSamples *FS = findFunctionSamples(Inst); @@ -730,7 +740,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( FunctionSamples::getGUID(R->getName()); }; - if (ProfileIsCSFlat) { + if (FunctionSamples::ProfileIsCS) { auto CalleeSamples = ContextTracker->getIndirectCalleeContextSamplesFor(DIL); if (CalleeSamples.empty()) @@ -783,7 +793,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); if (it.second) { - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) it.first->second = ContextTracker->getContextSamplesFor(DIL); else it.first->second = @@ -839,6 +849,13 @@ static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl<InstrProfValueData> &CallTargets, uint64_t Sum) { + // Bail out early if MaxNumPromotions is zero. + // This prevents allocating an array of zero length below. + // + // Note `updateIDTMetaData` is called in two places so check + // `MaxNumPromotions` inside it. + if (MaxNumPromotions == 0) + return; uint32_t NumVals = 0; // OldSum is the existing total count in the value profile data. uint64_t OldSum = 0; @@ -922,6 +939,14 @@ updateIDTMetaData(Instruction &Inst, bool SampleProfileLoader::tryPromoteAndInlineCandidate( Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSite) { + // Bail out early if sample-loader inliner is disabled. + if (DisableSampleLoaderInlining) + return false; + + // Bail out early if MaxNumPromotions is zero. + // This prevents allocating an array of zero length in callees below. + if (MaxNumPromotions == 0) + return false; auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName(); auto R = SymbolMap.find(CalleeFunctionName); if (R == SymbolMap.end() || !R->getValue()) @@ -1009,8 +1034,9 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( for (auto I : Candidates) { Function *CalledFunction = I->getCalledFunction(); if (CalledFunction) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", - I->getDebugLoc(), I->getParent()) + ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), + "InlineAttempt", I->getDebugLoc(), + I->getParent()) << "previous inlining reattempted for " << (Hot ? "hotness: '" : "size: '") << ore::NV("Callee", CalledFunction) << "' into '" @@ -1042,13 +1068,12 @@ void SampleProfileLoader::findExternalInlineCandidate( // For AutoFDO profile, retrieve candidate profiles by walking over // the nested inlinee profiles. - if (!ProfileIsCSFlat) { + if (!FunctionSamples::ProfileIsCS) { Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold); return; } - ContextTrieNode *Caller = - ContextTracker->getContextFor(Samples->getContext()); + ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples); std::queue<ContextTrieNode *> CalleeList; CalleeList.push(Caller); while (!CalleeList.empty()) { @@ -1098,11 +1123,20 @@ void SampleProfileLoader::findExternalInlineCandidate( /// Iteratively inline hot callsites of a function. /// -/// Iteratively traverse all callsites of the function \p F, and find if -/// the corresponding inlined instance exists and is hot in profile. If -/// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. If the call is an indirect call, first promote -/// it to direct call. Each indirect call is limited with a single target. +/// Iteratively traverse all callsites of the function \p F, so as to +/// find out callsites with corresponding inline instances. +/// +/// For such callsites, +/// - If it is hot enough, inline the callsites and adds callsites of the callee +/// into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call is limited with a single target. +/// +/// - If a callsite is not inlined, merge the its profile to the outline +/// version (if --sample-profile-merge-inlinee is true), or scale the +/// counters of standalone function based on the profile of inlined +/// instances (if --sample-profile-merge-inlinee is false). +/// +/// Later passes may consume the updated profiles. /// /// \param F function to perform iterative inlining. /// \param InlinedGUIDs a set to be updated to include all GUIDs that are @@ -1137,7 +1171,7 @@ bool SampleProfileLoader::inlineHotFunctions( assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || ProfileIsCSFlat) + if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1200,13 +1234,17 @@ bool SampleProfileLoader::inlineHotFunctions( // For CS profile, profile for not inlined context will be merged when // base profile is being retrieved. - if (!FunctionSamples::ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F); return Changed; } bool SampleProfileLoader::tryInlineCandidate( InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) { + // Do not attempt to inline a candidate if + // --disable-sample-loader-inlining is true. + if (DisableSampleLoaderInlining) + return false; CallBase &CB = *Candidate.CallInstr; Function *CalledFunction = CB.getCalledFunction(); @@ -1216,7 +1254,8 @@ bool SampleProfileLoader::tryInlineCandidate( InlineCost Cost = shouldInlineCandidate(Candidate); if (Cost.isNever()) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) + ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), + "InlineFail", DLoc, BB) << "incompatible inlining"); return false; } @@ -1226,45 +1265,45 @@ bool SampleProfileLoader::tryInlineCandidate( InlineFunctionInfo IFI(nullptr, GetAC); IFI.UpdateProfile = false; - if (InlineFunction(CB, IFI).isSuccess()) { - // Merge the attributes based on the inlining. - AttributeFuncs::mergeAttributesForInlining(*BB->getParent(), - *CalledFunction); + if (!InlineFunction(CB, IFI).isSuccess()) + return false; - // The call to InlineFunction erases I, so we can't pass it here. - emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, - *BB->getParent(), Cost, true, CSINLINE_DEBUG); + // Merge the attributes based on the inlining. + AttributeFuncs::mergeAttributesForInlining(*BB->getParent(), + *CalledFunction); - // Now populate the list of newly exposed call sites. - if (InlinedCallSites) { - InlinedCallSites->clear(); - for (auto &I : IFI.InlinedCallSites) - InlinedCallSites->push_back(I); - } + // The call to InlineFunction erases I, so we can't pass it here. + emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), + Cost, true, getAnnotatedRemarkPassName()); - if (ProfileIsCSFlat) - ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); - ++NumCSInlined; + // Now populate the list of newly exposed call sites. + if (InlinedCallSites) { + InlinedCallSites->clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites->push_back(I); + } - // Prorate inlined probes for a duplicated inlining callsite which probably - // has a distribution less than 100%. Samples for an inlinee should be - // distributed among the copies of the original callsite based on each - // callsite's distribution factor for counts accuracy. Note that an inlined - // probe may come with its own distribution factor if it has been duplicated - // in the inlinee body. The two factor are multiplied to reflect the - // aggregation of duplication. - if (Candidate.CallsiteDistribution < 1) { - for (auto &I : IFI.InlinedCallSites) { - if (Optional<PseudoProbe> Probe = extractProbe(*I)) - setProbeDistributionFactor(*I, Probe->Factor * - Candidate.CallsiteDistribution); - } - NumDuplicatedInlinesite++; - } + if (FunctionSamples::ProfileIsCS) + ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); + ++NumCSInlined; - return true; + // Prorate inlined probes for a duplicated inlining callsite which probably + // has a distribution less than 100%. Samples for an inlinee should be + // distributed among the copies of the original callsite based on each + // callsite's distribution factor for counts accuracy. Note that an inlined + // probe may come with its own distribution factor if it has been duplicated + // in the inlinee body. The two factor are multiplied to reflect the + // aggregation of duplication. + if (Candidate.CallsiteDistribution < 1) { + for (auto &I : IFI.InlinedCallSites) { + if (Optional<PseudoProbe> Probe = extractProbe(*I)) + setProbeDistributionFactor(*I, Probe->Factor * + Candidate.CallsiteDistribution); + } + NumDuplicatedInlinesite++; } - return false; + + return true; } bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, @@ -1285,14 +1324,8 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, if (Optional<PseudoProbe> Probe = extractProbe(*CB)) Factor = Probe->Factor; - uint64_t CallsiteCount = 0; - ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent()); - if (Weight) - CallsiteCount = Weight.get(); - if (CalleeSamples) - CallsiteCount = std::max( - CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); - + uint64_t CallsiteCount = + CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1387,7 +1420,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { bool SampleProfileLoader::inlineHotFunctionsWithPriority( Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { - // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure // Profile symbol list is ignored when profile-sample-accurate is on. assert((!ProfAccForSymsInList || @@ -1513,7 +1545,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( // For CS profile, profile for not inlined context will be merged when // base profile is being retrieved. - if (!FunctionSamples::ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F); return Changed; } @@ -1528,11 +1560,11 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( if (!Callee || Callee->isDeclaration()) continue; - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline", - I->getDebugLoc(), I->getParent()) - << "previous inlining not repeated: '" - << ore::NV("Callee", Callee) << "' into '" - << ore::NV("Caller", &F) << "'"); + ORE->emit( + OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline", + I->getDebugLoc(), I->getParent()) + << "previous inlining not repeated: '" << ore::NV("Callee", Callee) + << "' into '" << ore::NV("Caller", &F) << "'"); ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); @@ -1540,6 +1572,10 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( continue; } + // Do not merge a context that is already duplicated into the base profile. + if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase)) + continue; + if (ProfileMergeInlinee) { // A function call can be replicated by optimizations like callsite // splitting or jump threading and the replicates end up sharing the @@ -1623,7 +1659,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { // With CSSPGO all indirect call targets are counted torwards the // original indirect call site in the profile, including both // inlined and non-inlined targets. - if (!FunctionSamples::ProfileIsCSFlat) { + if (!FunctionSamples::ProfileIsCS) { if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) @@ -1714,6 +1750,11 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { } } + // FIXME: Re-enable for sample profiling after investigating why the sum + // of branch weights can be 0 + // + // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. @@ -1798,7 +1839,7 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", std::unique_ptr<ProfiledCallGraph> SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { std::unique_ptr<ProfiledCallGraph> ProfiledCG; - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker); else ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles()); @@ -1843,8 +1884,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { assert(&CG->getModule() == &M); - if (UseProfiledCallGraph || - (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) { + if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS && + !UseProfiledCallGraph.getNumOccurrences())) { // Use profiled call edges to augment the top-down order. There are cases // that the top-down order computed based on the static call graph doesn't // reflect real execution order. For example @@ -1973,40 +2014,50 @@ bool SampleProfileLoader::doInitialization(Module &M, ProfileInlineReplayScope, ProfileInlineReplayFallback, {ProfileInlineReplayFormat}}, - /*EmitRemarks=*/false); + /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner}); } - // Apply tweaks if context-sensitive profile is available. - if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) { - ProfileIsCSFlat = Reader->profileIsCSFlat(); + // Apply tweaks if context-sensitive or probe-based profile is available. + if (Reader->profileIsCS() || Reader->profileIsPreInlined() || + Reader->profileIsProbeBased()) { + if (!UseIterativeBFIInference.getNumOccurrences()) + UseIterativeBFIInference = true; + if (!SampleProfileUseProfi.getNumOccurrences()) + SampleProfileUseProfi = true; + if (!EnableExtTspBlockPlacement.getNumOccurrences()) + EnableExtTspBlockPlacement = true; // Enable priority-base inliner and size inline by default for CSSPGO. if (!ProfileSizeInline.getNumOccurrences()) ProfileSizeInline = true; if (!CallsitePrioritizedInline.getNumOccurrences()) CallsitePrioritizedInline = true; - - // For CSSPGO, use preinliner decision by default when available. - if (!UsePreInlinerDecision.getNumOccurrences()) - UsePreInlinerDecision = true; - // For CSSPGO, we also allow recursive inline to best use context profile. if (!AllowRecursiveInline.getNumOccurrences()) AllowRecursiveInline = true; - // Enable iterative-BFI by default for CSSPGO. - if (!UseIterativeBFIInference.getNumOccurrences()) - UseIterativeBFIInference = true; - // Enable Profi by default for CSSPGO. - if (!SampleProfileUseProfi.getNumOccurrences()) - SampleProfileUseProfi = true; + if (Reader->profileIsPreInlined()) { + if (!UsePreInlinerDecision.getNumOccurrences()) + UsePreInlinerDecision = true; + } - if (FunctionSamples::ProfileIsCSFlat) { - // Tracker for profiles under different context - ContextTracker = std::make_unique<SampleContextTracker>( - Reader->getProfiles(), &GUIDToFuncNameMap); + if (!Reader->profileIsCS()) { + // Non-CS profile should be fine without a function size budget for the + // inliner since the contexts in the profile are either all from inlining + // in the prevoius build or pre-computed by the preinliner with a size + // cap, thus they are bounded. + if (!ProfileInlineLimitMin.getNumOccurrences()) + ProfileInlineLimitMin = std::numeric_limits<unsigned>::max(); + if (!ProfileInlineLimitMax.getNumOccurrences()) + ProfileInlineLimitMax = std::numeric_limits<unsigned>::max(); } } + if (Reader->profileIsCS()) { + // Tracker for profiles under different context + ContextTracker = std::make_unique<SampleContextTracker>( + Reader->getProfiles(), &GUIDToFuncNameMap); + } + // Load pseudo probe descriptors for probe-based function samples. if (Reader->profileIsProbeBased()) { ProbeManager = std::make_unique<PseudoProbeManager>(M); @@ -2082,7 +2133,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, } // Account for cold calls not inlined.... - if (!ProfileIsCSFlat) + if (!FunctionSamples::ProfileIsCS) for (const std::pair<Function *, NotInlinedProfileInfo> &pair : notInlinedCallInfo) updateProfileCallee(pair.first, pair.second.entryCount); @@ -2145,7 +2196,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) // Initialize entry count when the function has no existing entry // count value. - if (!F.getEntryCount().hasValue()) + if (!F.getEntryCount()) F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; if (AM) { @@ -2158,7 +2209,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) ORE = OwnedORE.get(); } - if (ProfileIsCSFlat) + if (FunctionSamples::ProfileIsCS) Samples = ContextTracker->getBaseSamplesFor(F); else Samples = Reader->getSamplesFor(F); diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index e104ae00e916..d1ab2649ee2e 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -13,21 +13,19 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <unordered_set> @@ -416,7 +414,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F, FunctionAnalysisManager &FAM) { BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); auto BBProfileCount = [&BFI](BasicBlock *BB) { - return BFI.getBlockProfileCount(BB).getValueOr(0); + return BFI.getBlockProfileCount(BB).value_or(0); }; // Collect the sum of execution weight for each probe. diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp index 95393d9476e0..c7d54b8cdeb0 100644 --- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp +++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -25,18 +25,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/SyntheticCountsUtils.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; using Scaled64 = ScaledNumber<uint64_t>; @@ -47,18 +42,17 @@ using ProfileCount = Function::ProfileCount; namespace llvm { cl::opt<int> InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), - cl::ZeroOrMore, cl::desc("Initial value of synthetic entry count")); } // namespace llvm /// Initial synthetic count assigned to inline functions. static cl::opt<int> InlineSyntheticCount( - "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, + "inline-synthetic-count", cl::Hidden, cl::init(15), cl::desc("Initial synthetic entry count for inline functions.")); /// Initial synthetic count assigned to cold functions. static cl::opt<int> ColdSyntheticCount( - "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, + "cold-synthetic-count", cl::Hidden, cl::init(5), cl::desc("Initial synthetic entry count for cold functions.")); // Assign initial synthetic entry counts to functions. diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 52708ff2f226..a360a768a2bc 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -21,7 +21,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Pass.h" -#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -311,7 +310,8 @@ void splitAndWriteThinLTOBitcode( return; } if (!F->isDeclaration() && - computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) + computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == + FMRB_DoesNotAccessMemory) EligibleVirtualFns.insert(F); }); } @@ -542,11 +542,11 @@ class WriteThinLTOBitcode : public ModulePass { raw_ostream &OS; // raw_ostream to print on // The output stream on which to emit a minimized module for use // just in the thin link, if requested. - raw_ostream *ThinLinkOS; + raw_ostream *ThinLinkOS = nullptr; public: static char ID; // Pass identification, replacement for typeid - WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { + WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 8b30f0e989a1..898a213d0849 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -57,6 +57,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" @@ -79,6 +80,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndexYAML.h" @@ -95,6 +97,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Evaluator.h" #include <algorithm> #include <cstddef> @@ -107,6 +110,15 @@ using namespace wholeprogramdevirt; #define DEBUG_TYPE "wholeprogramdevirt" +STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets"); +STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations"); +STATISTIC(NumBranchFunnel, "Number of branch funnels"); +STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations"); +STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations"); +STATISTIC(NumVirtConstProp1Bit, + "Number of 1 bit virtual constant propagations"); +STATISTIC(NumVirtConstProp, "Number of virtual constant propagations"); + static cl::opt<PassSummaryAction> ClSummaryAction( "wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), @@ -132,13 +144,12 @@ static cl::opt<std::string> ClWriteSummary( static cl::opt<unsigned> ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, - cl::init(10), cl::ZeroOrMore, + cl::init(10), cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); static cl::opt<bool> PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, - cl::init(false), cl::ZeroOrMore, cl::desc("Print index-based devirtualization messages")); /// Provide a way to force enable whole program visibility in tests. @@ -146,30 +157,34 @@ static cl::opt<bool> /// !vcall_visibility metadata (the mere presense of type tests /// previously implied hidden visibility). static cl::opt<bool> - WholeProgramVisibility("whole-program-visibility", cl::init(false), - cl::Hidden, cl::ZeroOrMore, + WholeProgramVisibility("whole-program-visibility", cl::Hidden, cl::desc("Enable whole program visibility")); /// Provide a way to force disable whole program for debugging or workarounds, /// when enabled via the linker. static cl::opt<bool> DisableWholeProgramVisibility( - "disable-whole-program-visibility", cl::init(false), cl::Hidden, - cl::ZeroOrMore, + "disable-whole-program-visibility", cl::Hidden, cl::desc("Disable whole program visibility (overrides enabling options)")); /// Provide way to prevent certain function from being devirtualized static cl::list<std::string> SkipFunctionNames("wholeprogramdevirt-skip", cl::desc("Prevent function(s) from being devirtualized"), - cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated); + cl::Hidden, cl::CommaSeparated); -/// Mechanism to add runtime checking of devirtualization decisions, trapping on -/// any that are not correct. Useful for debugging undefined behavior leading to -/// failures with WPD. -static cl::opt<bool> - CheckDevirt("wholeprogramdevirt-check", cl::init(false), cl::Hidden, - cl::ZeroOrMore, - cl::desc("Add code to trap on incorrect devirtualizations")); +/// Mechanism to add runtime checking of devirtualization decisions, optionally +/// trapping or falling back to indirect call on any that are not correct. +/// Trapping mode is useful for debugging undefined behavior leading to failures +/// with WPD. Fallback mode is useful for ensuring safety when whole program +/// visibility may be compromised. +enum WPDCheckMode { None, Trap, Fallback }; +static cl::opt<WPDCheckMode> DevirtCheckMode( + "wholeprogramdevirt-check", cl::Hidden, + cl::desc("Type of checking for incorrect devirtualizations"), + cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"), + clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"), + clEnumValN(WPDCheckMode::Fallback, "fallback", + "Fallback to indirect when incorrect"))); namespace { struct PatternList { @@ -866,13 +881,14 @@ void updateVCallVisibilityInIndex( if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) return; for (auto &P : Index) { + // Don't upgrade the visibility for symbols exported to the dynamic + // linker, as we have no information on their eventual use. + if (DynamicExportSymbols.count(P.first)) + continue; for (auto &S : P.second.SummaryList) { auto *GVar = dyn_cast<GlobalVarSummary>(S.get()); if (!GVar || - GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic || - // Don't upgrade the visibility for symbols exported to the dynamic - // linker, as we have no information on their eventual use. - DynamicExportSymbols.count(P.first)) + GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) continue; GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); } @@ -1133,16 +1149,17 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, if (RemarksEnabled) VCallSite.emitRemark("single-impl", TheFn->stripPointerCasts()->getName(), OREGetter); + NumSingleImpl++; auto &CB = VCallSite.CB; assert(!CB.getCalledFunction() && "devirtualizing direct call?"); IRBuilder<> Builder(&CB); Value *Callee = Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType()); - // If checking is enabled, add support to compare the virtual function - // pointer to the devirtualized target. In case of a mismatch, perform a - // debug trap. - if (CheckDevirt) { + // If trap checking is enabled, add support to compare the virtual + // function pointer to the devirtualized target. In case of a mismatch, + // perform a debug trap. + if (DevirtCheckMode == WPDCheckMode::Trap) { auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee); Instruction *ThenTerm = SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false); @@ -1152,8 +1169,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, CallTrap->setDebugLoc(CB.getDebugLoc()); } - // Devirtualize. - CB.setCalledOperand(Callee); + // If fallback checking is enabled, add support to compare the virtual + // function pointer to the devirtualized target. In case of a mismatch, + // fall back to indirect call. + if (DevirtCheckMode == WPDCheckMode::Fallback) { + MDNode *Weights = + MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1); + // Version the indirect call site. If the called value is equal to the + // given callee, 'NewInst' will be executed, otherwise the original call + // site will be executed. + CallBase &NewInst = versionCallSite(CB, Callee, Weights); + NewInst.setCalledOperand(Callee); + // Since the new call site is direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and + // !callees metadata. + NewInst.setMetadata(LLVMContext::MD_prof, nullptr); + NewInst.setMetadata(LLVMContext::MD_callees, nullptr); + // Additionally, we should remove them from the fallback indirect call, + // so that we don't attempt to perform indirect call promotion later. + CB.setMetadata(LLVMContext::MD_prof, nullptr); + CB.setMetadata(LLVMContext::MD_callees, nullptr); + } + + // In either trapping or non-checking mode, devirtualize original call. + else { + // Devirtualize unconditionally. + CB.setCalledOperand(Callee); + // Since the call site is now direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and + // !callees metadata. + CB.setMetadata(LLVMContext::MD_prof, nullptr); + CB.setMetadata(LLVMContext::MD_callees, nullptr); + } // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) @@ -1208,7 +1255,7 @@ bool DevirtModule::trySingleImplDevirt( return false; // If so, update each call site to call that implementation directly. - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) TargetsForSlot[0].WasDevirt = true; bool IsExported = false; @@ -1279,7 +1326,7 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot, return false; // Collect functions devirtualized at least for one call site for stats. - if (PrintSummaryDevirt) + if (PrintSummaryDevirt || AreStatisticsEnabled()) DevirtTargets.insert(TheFn); auto &S = TheFn.getSummaryList()[0]; @@ -1385,6 +1432,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, !FSAttr.getValueAsString().contains("+retpoline")) continue; + NumBranchFunnel++; if (RemarksEnabled) VCallSite.emitRemark("branch-funnel", JT->stripPointerCasts()->getName(), OREGetter); @@ -1476,6 +1524,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, for (auto Call : CSInfo.CallSites) { if (!OptimizedCalls.insert(&Call.CB).second) continue; + NumUniformRetVal++; Call.replaceAndErase( "uniform-ret-val", FnName, RemarksEnabled, OREGetter, ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal)); @@ -1499,7 +1548,7 @@ bool DevirtModule::tryUniformRetValOpt( } applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal); - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; return true; @@ -1592,6 +1641,7 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable, B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType())); Cmp = B.CreateZExt(Cmp, Call.CB.getType()); + NumUniqueRetVal++; Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter, Cmp); } @@ -1636,7 +1686,7 @@ bool DevirtModule::tryUniqueRetValOpt( UniqueMemberAddr); // Update devirtualization statistics for targets. - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; @@ -1665,11 +1715,13 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName, Value *Bits = B.CreateLoad(Int8Ty, Addr); Value *BitsAndBit = B.CreateAnd(Bits, Bit); auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0)); + NumVirtConstProp1Bit++; Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled, OREGetter, IsBitSet); } else { Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo()); Value *Val = B.CreateLoad(RetType, ValAddr); + NumVirtConstProp++; Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, OREGetter, Val); } @@ -1701,7 +1753,7 @@ bool DevirtModule::tryVirtualConstProp( for (VirtualCallTarget &Target : TargetsForSlot) { if (Target.Fn->isDeclaration() || computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) != - MAK_ReadNone || + FMRB_DoesNotAccessMemory || Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() || Target.Fn->getReturnType() != RetType) return false; @@ -1755,7 +1807,7 @@ bool DevirtModule::tryVirtualConstProp( setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte, OffsetBit); - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; @@ -1963,7 +2015,7 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { // (although this is unlikely). In that case, explicitly build a pair and // RAUW it. if (!CI->use_empty()) { - Value *Pair = UndefValue::get(CI->getType()); + Value *Pair = PoisonValue::get(CI->getType()); IRBuilder<> B(CI); Pair = B.CreateInsertValue(Pair, LoadedValue, {0}); Pair = B.CreateInsertValue(Pair, TypeTestCall, {1}); @@ -2151,9 +2203,9 @@ bool DevirtModule::run() { removeRedundantTypeTests(); - // We have lowered or deleted the type instrinsics, so we will no - // longer have enough information to reason about the liveness of virtual - // function pointers in GlobalDCE. + // We have lowered or deleted the type intrinsics, so we will no longer have + // enough information to reason about the liveness of virtual function + // pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) GV.eraseMetadata(LLVMContext::MD_vcall_visibility); @@ -2243,7 +2295,7 @@ bool DevirtModule::run() { } // Collect functions devirtualized at least for one call site for stats. - if (RemarksEnabled) + if (RemarksEnabled || AreStatisticsEnabled()) for (const auto &T : TargetsForSlot) if (T.WasDevirt) DevirtTargets[std::string(T.Fn->getName())] = T.Fn; @@ -2276,6 +2328,8 @@ bool DevirtModule::run() { } } + NumDevirtTargets += DevirtTargets.size(); + removeRedundantTypeTests(); // Rebuild each global we touched as part of virtual constant propagation to @@ -2284,9 +2338,9 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); - // We have lowered or deleted the type instrinsics, so we will no - // longer have enough information to reason about the liveness of virtual - // function pointers in GlobalDCE. + // We have lowered or deleted the type intrinsics, so we will no longer have + // enough information to reason about the liveness of virtual function + // pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) GV.eraseMetadata(LLVMContext::MD_vcall_visibility); @@ -2367,4 +2421,6 @@ void DevirtIndex::run() { if (PrintSummaryDevirt) for (const auto &DT : DevirtTargets) errs() << "Devirtualized call to " << DT << "\n"; + + NumDevirtTargets += DevirtTargets.size(); } |
