diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO')
30 files changed, 11429 insertions, 7070 deletions
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index 06d1763353f46..53f9512f86f30 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" @@ -37,30 +36,30 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, // Add inline assumptions during code generation. FunctionAnalysisManager &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - std::function<AssumptionCache &(Function &)> GetAssumptionCache = - [&](Function &F) -> AssumptionCache & { + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; - InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache); + InlineFunctionInfo IFI(/*cg=*/nullptr, GetAssumptionCache); - SmallSetVector<CallSite, 16> Calls; + SmallSetVector<CallBase *, 16> Calls; bool Changed = false; SmallVector<Function *, 16> InlinedFunctions; for (Function &F : M) if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) && - isInlineViable(F)) { + isInlineViable(F).isSuccess()) { Calls.clear(); for (User *U : F.users()) - if (auto CS = CallSite(U)) - if (CS.getCalledFunction() == &F) - Calls.insert(CS); + if (auto *CB = dyn_cast<CallBase>(U)) + if (CB->getCalledFunction() == &F) + Calls.insert(CB); - for (CallSite CS : Calls) + for (CallBase *CB : Calls) // FIXME: We really shouldn't be able to fail to inline at this point! // We should do something to log or check the inline failures here. Changed |= - InlineFunction(CS, IFI, /*CalleeAAR=*/nullptr, InsertLifetime); + InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime) + .isSuccess(); // Remember to try and delete this function afterward. This both avoids // re-walking the rest of the module and avoids dealing with any iterator @@ -116,7 +115,7 @@ public: static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) override; + InlineCost getInlineCost(CallBase &CB) override; using llvm::Pass::doFinalization; bool doFinalization(CallGraph &CG) override { @@ -151,8 +150,8 @@ Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) { /// computed here, but as we only expect to do this for relatively few and /// small functions which have the explicit attribute to force inlining, it is /// likely not worth it in practice. -InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) { - Function *Callee = CS.getCalledFunction(); +InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) { + Function *Callee = CB.getCalledFunction(); // Only inline direct calls to functions with always-inline attributes // that are viable for inlining. @@ -163,12 +162,12 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) { if (Callee->isDeclaration()) return InlineCost::getNever("no definition"); - if (!CS.hasFnAttr(Attribute::AlwaysInline)) + if (!CB.hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getNever("no alwaysinline attribute"); auto IsViable = isInlineViable(*Callee); - if (!IsViable) - return InlineCost::getNever(IsViable.message); + if (!IsViable.isSuccess()) + return InlineCost::getNever(IsViable.getFailureReason()); return InlineCost::getAlways("always inliner"); } diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index cdf8a2eb598ee..ad0d7eb51507a 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -36,7 +36,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -53,7 +52,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -74,6 +72,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include <algorithm> @@ -105,7 +104,7 @@ using IndicesVector = std::vector<uint64_t>; static Function * doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform, - Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> + Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. @@ -197,7 +196,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( - cast<PointerType>(I->getType()->getScalarType())->getElementType(), + cast<PointerType>(I->getType())->getElementType(), ArgIndex.second)); ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); @@ -241,15 +240,14 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // SmallVector<Value *, 16> Args; while (!F->use_empty()) { - CallSite CS(F->user_back()); - assert(CS.getCalledFunction() == F); - Instruction *Call = CS.getInstruction(); - const AttributeList &CallPAL = CS.getAttributes(); - IRBuilder<NoFolder> IRB(Call); + CallBase &CB = cast<CallBase>(*F->user_back()); + assert(CB.getCalledFunction() == F); + const AttributeList &CallPAL = CB.getAttributes(); + IRBuilder<NoFolder> IRB(&CB); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. - CallSite::arg_iterator AI = CS.arg_begin(); + auto AI = CB.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) @@ -295,7 +293,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) ElTy = ElPTy->getElementType(); else - ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); + ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II); } // And create a GEP to extract those indices. V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); @@ -305,7 +303,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // of the previous load. LoadInst *newLoad = IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment())); + newLoad->setAlignment(OrigLoad->getAlign()); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); @@ -317,46 +315,43 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, } // Push any varargs arguments on the list. - for (; AI != CS.arg_end(); ++AI, ++ArgNo) { + for (; AI != CB.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; - CS.getOperandBundlesAsDefs(OpBundles); + CB.getOperandBundlesAsDefs(OpBundles); - CallSite NewCS; - if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + CallBase *NewCS = nullptr; + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call); + Args, OpBundles, "", &CB); } else { - auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); - NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); + auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB); + NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind()); NewCS = NewCall; } - NewCS.setCallingConv(CS.getCallingConv()); - NewCS.setAttributes( + NewCS->setCallingConv(CB.getCallingConv()); + NewCS->setAttributes( AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), CallPAL.getRetAttributes(), ArgAttrVec)); - NewCS->setDebugLoc(Call->getDebugLoc()); - uint64_t W; - if (Call->extractProfTotalWeight(W)) - NewCS->setProfWeight(W); + NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); Args.clear(); ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) - (*ReplaceCallSite)(CS, NewCS); + (*ReplaceCallSite)(CB, *NewCS); - if (!Call->use_empty()) { - Call->replaceAllUsesWith(NewCS.getInstruction()); - NewCS->takeName(Call); + if (!CB.use_empty()) { + CB.replaceAllUsesWith(NewCS); + NewCS->takeName(&CB); } // Finally, remove the old call from the program, reducing the use-count of // F. - Call->eraseFromParent(); + CB.eraseFromParent(); } const DataLayout &DL = F->getParent()->getDataLayout(); @@ -387,9 +382,10 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - Value *TheAlloca = - new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - MaybeAlign(I->getParamAlignment()), "", InsertPt); + Value *TheAlloca = new AllocaInst( + AgTy, DL.getAllocaAddrSpace(), nullptr, + I->getParamAlign().getValueOr(DL.getPrefTypeAlign(AgTy)), "", + InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; @@ -453,12 +449,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, assert(It != ArgIndices.end() && "GEP not handled??"); } - std::string NewName = I->getName(); - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { - NewName += "." + utostr(Operands[i]); - } - NewName += ".val"; - TheArg->setName(NewName); + TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(), + make_range(Operands.begin(), Operands.end()))); LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); @@ -492,10 +484,9 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) { // Look at all call sites of the function. At this point we know we only have // direct callees. for (User *U : Callee->users()) { - CallSite CS(U); - assert(CS && "Should only have direct calls!"); + CallBase &CB = cast<CallBase>(*U); - if (!isDereferenceablePointer(CS.getArgument(ArgNo), Ty, DL)) + if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL)) return false; } return true; @@ -774,8 +765,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR return true; } -/// Checks if a type could have padding bytes. -static bool isDenselyPacked(Type *type, const DataLayout &DL) { +bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) return false; @@ -785,13 +775,18 @@ static bool isDenselyPacked(Type *type, const DataLayout &DL) { if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) return false; - if (!isa<CompositeType>(type)) - return true; + // FIXME: This isn't the right way to check for padding in vectors with + // non-byte-size elements. + if (VectorType *seqTy = dyn_cast<VectorType>(type)) + return isDenselyPacked(seqTy->getElementType(), DL); - // For homogenous sequential types, check for padding within members. - if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) + // For array types, check for padding within members. + if (ArrayType *seqTy = dyn_cast<ArrayType>(type)) return isDenselyPacked(seqTy->getElementType(), DL); + if (!isa<StructType>(type)) + return true; + // Check for padding within and between elements of a struct. StructType *StructTy = cast<StructType>(type); const StructLayout *Layout = DL.getStructLayout(StructTy); @@ -844,14 +839,16 @@ static bool canPaddingBeAccessed(Argument *arg) { return false; } -static bool areFunctionArgsABICompatible( +bool ArgumentPromotionPass::areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform) { for (const Use &U : F.uses()) { - CallSite CS(U.getUser()); - const Function *Caller = CS.getCaller(); - const Function *Callee = CS.getCalledFunction(); + CallBase *CB = dyn_cast<CallBase>(U.getUser()); + if (!CB) + return false; + const Function *Caller = CB->getCaller(); + const Function *Callee = CB->getCalledFunction(); if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform)) return false; @@ -866,7 +863,7 @@ static bool areFunctionArgsABICompatible( static Function * promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, unsigned MaxElements, - Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> + Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>> ReplaceCallSite, const TargetTransformInfo &TTI) { // Don't perform argument promotion for naked functions; otherwise we can end @@ -905,16 +902,16 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, // is self-recursive and check that target features are compatible. bool isSelfRecursive = false; for (Use &U : F->uses()) { - CallSite CS(U.getUser()); + CallBase *CB = dyn_cast<CallBase>(U.getUser()); // Must be a direct call. - if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) + if (CB == nullptr || !CB->isCallee(&U)) return nullptr; // Can't change signature of musttail callee - if (CS.isMustTailCall()) + if (CB->isMustTailCall()) return nullptr; - if (CS.getInstruction()->getParent()->getParent() == F) + if (CB->getParent()->getParent() == F) isSelfRecursive = true; } @@ -942,18 +939,18 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, F->removeParamAttr(ArgNo, Attribute::StructRet); F->addParamAttr(ArgNo, Attribute::NoAlias); for (Use &U : F->uses()) { - CallSite CS(U.getUser()); - CS.removeParamAttr(ArgNo, Attribute::StructRet); - CS.addParamAttr(ArgNo, Attribute::NoAlias); + CallBase &CB = cast<CallBase>(*U.getUser()); + CB.removeParamAttr(ArgNo, Attribute::StructRet); + CB.addParamAttr(ArgNo, Attribute::NoAlias); } } // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. - bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + bool isSafeToPromote = PtrArg->hasByValAttr() && + (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || + !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (MaxElements > 0 && STy->getNumElements() > MaxElements) { @@ -1011,8 +1008,8 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote, - ByValArgsToTransform)) + if (!ArgumentPromotionPass::areFunctionArgsABICompatible( + *F, TTI, ArgsToPromote, ByValArgsToTransform)) return nullptr; return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); @@ -1135,14 +1132,13 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { if (!OldF) continue; - auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) { - Function *Caller = OldCS.getInstruction()->getParent()->getParent(); + auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) { + Function *Caller = OldCS.getParent()->getParent(); CallGraphNode *NewCalleeNode = CG.getOrInsertFunction(NewCS.getCalledFunction()); CallGraphNode *CallerNode = CG[Caller]; - CallerNode->replaceCallEdge(*cast<CallBase>(OldCS.getInstruction()), - *cast<CallBase>(NewCS.getInstruction()), - NewCalleeNode); + CallerNode->replaceCallEdge(cast<CallBase>(OldCS), + cast<CallBase>(NewCS), NewCalleeNode); }; const TargetTransformInfo &TTI = diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index f2995817eaf89..f96dac5f3515c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements an inter procedural pass that deduces and/or propagating +// This file implements an interprocedural pass that deduces and/or propagates // attributes. This is done in an abstract interpretation style fixpoint // iteration. See the Attributor.h file comment and the class descriptions in // that file for more information. @@ -15,29 +15,16 @@ #include "llvm/Transforms/IPO/Attributor.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -47,10 +34,12 @@ using namespace llvm; #define DEBUG_TYPE "attributor" +STATISTIC(NumFnDeleted, "Number of function deleted"); STATISTIC(NumFnWithExactDefinition, - "Number of function with exact definitions"); + "Number of functions with exact definitions"); STATISTIC(NumFnWithoutExactDefinition, - "Number of function without exact definitions"); + "Number of functions without exact definitions"); +STATISTIC(NumFnShallowWrapperCreated, "Number of shallow wrappers created"); STATISTIC(NumAttributesTimedOut, "Number of abstract attributes timed out before fixpoint"); STATISTIC(NumAttributesValidFixpoint, @@ -60,80 +49,6 @@ STATISTIC(NumAttributesManifested, STATISTIC(NumAttributesFixedDueToRequiredDependences, "Number of abstract attributes fixed due to required dependences"); -// Some helper macros to deal with statistics tracking. -// -// Usage: -// For simple IR attribute tracking overload trackStatistics in the abstract -// attribute and choose the right STATS_DECLTRACK_********* macro, -// e.g.,: -// void trackStatistics() const override { -// STATS_DECLTRACK_ARG_ATTR(returned) -// } -// If there is a single "increment" side one can use the macro -// STATS_DECLTRACK with a custom message. If there are multiple increment -// sides, STATS_DECL and STATS_TRACK can also be used separatly. -// -#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \ - ("Number of " #TYPE " marked '" #NAME "'") -#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME -#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG); -#define STATS_DECL(NAME, TYPE, MSG) \ - STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); -#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); -#define STATS_DECLTRACK(NAME, TYPE, MSG) \ - { \ - STATS_DECL(NAME, TYPE, MSG) \ - STATS_TRACK(NAME, TYPE) \ - } -#define STATS_DECLTRACK_ARG_ATTR(NAME) \ - STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) -#define STATS_DECLTRACK_CSARG_ATTR(NAME) \ - STATS_DECLTRACK(NAME, CSArguments, \ - BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME)) -#define STATS_DECLTRACK_FN_ATTR(NAME) \ - STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME)) -#define STATS_DECLTRACK_CS_ATTR(NAME) \ - STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME)) -#define STATS_DECLTRACK_FNRET_ATTR(NAME) \ - STATS_DECLTRACK(NAME, FunctionReturn, \ - BUILD_STAT_MSG_IR_ATTR(function returns, NAME)) -#define STATS_DECLTRACK_CSRET_ATTR(NAME) \ - STATS_DECLTRACK(NAME, CSReturn, \ - BUILD_STAT_MSG_IR_ATTR(call site returns, NAME)) -#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \ - STATS_DECLTRACK(NAME, Floating, \ - ("Number of floating values known to be '" #NAME "'")) - -// Specialization of the operator<< for abstract attributes subclasses. This -// disambiguates situations where multiple operators are applicable. -namespace llvm { -#define PIPE_OPERATOR(CLASS) \ - raw_ostream &operator<<(raw_ostream &OS, const CLASS &AA) { \ - return OS << static_cast<const AbstractAttribute &>(AA); \ - } - -PIPE_OPERATOR(AAIsDead) -PIPE_OPERATOR(AANoUnwind) -PIPE_OPERATOR(AANoSync) -PIPE_OPERATOR(AANoRecurse) -PIPE_OPERATOR(AAWillReturn) -PIPE_OPERATOR(AANoReturn) -PIPE_OPERATOR(AAReturnedValues) -PIPE_OPERATOR(AANonNull) -PIPE_OPERATOR(AANoAlias) -PIPE_OPERATOR(AADereferenceable) -PIPE_OPERATOR(AAAlign) -PIPE_OPERATOR(AANoCapture) -PIPE_OPERATOR(AAValueSimplify) -PIPE_OPERATOR(AANoFree) -PIPE_OPERATOR(AAHeapToStack) -PIPE_OPERATOR(AAReachability) -PIPE_OPERATOR(AAMemoryBehavior) -PIPE_OPERATOR(AAValueConstantRange) - -#undef PIPE_OPERATOR -} // namespace llvm - // TODO: Determine a good default value. // // In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads @@ -151,30 +66,24 @@ static cl::opt<bool> VerifyMaxFixpointIterations( cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), cl::init(false)); -static cl::opt<bool> DisableAttributor( - "attributor-disable", cl::Hidden, - cl::desc("Disable the attributor inter-procedural deduction pass."), - cl::init(true)); - static cl::opt<bool> AnnotateDeclarationCallSites( "attributor-annotate-decl-cs", cl::Hidden, cl::desc("Annotate call sites of function declarations."), cl::init(false)); -static cl::opt<bool> ManifestInternal( - "attributor-manifest-internal", cl::Hidden, - cl::desc("Manifest Attributor internal string attributes."), - cl::init(false)); - -static cl::opt<unsigned> DepRecInterval( - "attributor-dependence-recompute-interval", cl::Hidden, - cl::desc("Number of iterations until dependences are recomputed."), - cl::init(4)); - static cl::opt<bool> EnableHeapToStack("enable-heap-to-stack-conversion", cl::init(true), cl::Hidden); -static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), - cl::Hidden); +static cl::opt<bool> + AllowShallowWrappers("attributor-allow-shallow-wrappers", cl::Hidden, + cl::desc("Allow the Attributor to create shallow " + "wrappers for non-exact definitions."), + cl::init(false)); + +static cl::list<std::string> + SeedAllowList("attributor-seed-allow-list", cl::Hidden, + cl::desc("Comma seperated list of attrbute names that are " + "allowed to be seeded."), + cl::ZeroOrMore, cl::CommaSeparated); /// Logic operators for the change status enum class. /// @@ -187,6 +96,49 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { } ///} +/// Return true if \p New is equal or worse than \p Old. +static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { + if (!Old.isIntAttribute()) + return true; + + return Old.getValueAsInt() >= New.getValueAsInt(); +} + +/// Return true if the information provided by \p Attr was added to the +/// attribute list \p Attrs. This is only the case if it was not already present +/// in \p Attrs at the position describe by \p PK and \p AttrIdx. +static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, + AttributeList &Attrs, int AttrIdx) { + + if (Attr.isEnumAttribute()) { + Attribute::AttrKind Kind = Attr.getKindAsEnum(); + if (Attrs.hasAttribute(AttrIdx, Kind)) + if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + return false; + Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); + return true; + } + if (Attr.isStringAttribute()) { + StringRef Kind = Attr.getKindAsString(); + if (Attrs.hasAttribute(AttrIdx, Kind)) + if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + return false; + Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); + return true; + } + if (Attr.isIntAttribute()) { + Attribute::AttrKind Kind = Attr.getKindAsEnum(); + if (Attrs.hasAttribute(AttrIdx, Kind)) + if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + return false; + Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind); + Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); + return true; + } + + llvm_unreachable("Expected enum or string attribute!"); +} + Argument *IRPosition::getAssociatedArgument() const { if (getPositionKind() == IRP_ARGUMENT) return cast<Argument>(&getAnchorValue()); @@ -202,10 +154,10 @@ Argument *IRPosition::getAssociatedArgument() const { // of the underlying call site operand, we want the corresponding callback // callee argument and not the direct callee argument. Optional<Argument *> CBCandidateArg; - SmallVector<const Use *, 4> CBUses; - ImmutableCallSite ICS(&getAnchorValue()); - AbstractCallSite::getCallbackUses(ICS, CBUses); - for (const Use *U : CBUses) { + SmallVector<const Use *, 4> CallbackUses; + const auto &CB = cast<CallBase>(getAnchorValue()); + AbstractCallSite::getCallbackUses(CB, CallbackUses); + for (const Use *U : CallbackUses) { AbstractCallSite ACS(U); assert(ACS && ACS.isCallbackCall()); if (!ACS.getCalledFunction()) @@ -234,176 +186,13 @@ Argument *IRPosition::getAssociatedArgument() const { // If no callbacks were found, or none used the underlying call site operand // exclusively, use the direct callee argument if available. - const Function *Callee = ICS.getCalledFunction(); + const Function *Callee = CB.getCalledFunction(); if (Callee && Callee->arg_size() > unsigned(ArgNo)) return Callee->getArg(ArgNo); return nullptr; } -/// For calls (and invokes) we will only replace instruction uses to not disturb -/// the old style call graph. -/// TODO: Remove this once we get rid of the old PM. -static void replaceAllInstructionUsesWith(Value &Old, Value &New) { - if (!isa<CallBase>(Old)) - return Old.replaceAllUsesWith(&New); - SmallVector<Use *, 8> Uses; - for (Use &U : Old.uses()) - if (isa<Instruction>(U.getUser())) - Uses.push_back(&U); - for (Use *U : Uses) - U->set(&New); -} - -/// Recursively visit all values that might become \p IRP at some point. This -/// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. Once we cannot look through the value any -/// further, the callback \p VisitValueCB is invoked and passed the current -/// value, the \p State, and a flag to indicate if we stripped anything. To -/// limit how much effort is invested, we will never visit more values than -/// specified by \p MaxValues. -template <typename AAType, typename StateTy> -static bool genericValueTraversal( - Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State, - const function_ref<bool(Value &, StateTy &, bool)> &VisitValueCB, - int MaxValues = 8) { - - const AAIsDead *LivenessAA = nullptr; - if (IRP.getAnchorScope()) - LivenessAA = &A.getAAFor<AAIsDead>( - QueryingAA, IRPosition::function(*IRP.getAnchorScope()), - /* TrackDependence */ false); - bool AnyDead = false; - - // TODO: Use Positions here to allow context sensitivity in VisitValueCB - SmallPtrSet<Value *, 16> Visited; - SmallVector<Value *, 16> Worklist; - Worklist.push_back(&IRP.getAssociatedValue()); - - int Iteration = 0; - do { - Value *V = Worklist.pop_back_val(); - - // Check if we should process the current value. To prevent endless - // recursion keep a record of the values we followed! - if (!Visited.insert(V).second) - continue; - - // Make sure we limit the compile time for complex expressions. - if (Iteration++ >= MaxValues) - return false; - - // Explicitly look through calls with a "returned" attribute if we do - // not have a pointer as stripPointerCasts only works on them. - Value *NewV = nullptr; - if (V->getType()->isPointerTy()) { - NewV = V->stripPointerCasts(); - } else { - CallSite CS(V); - if (CS && CS.getCalledFunction()) { - for (Argument &Arg : CS.getCalledFunction()->args()) - if (Arg.hasReturnedAttr()) { - NewV = CS.getArgOperand(Arg.getArgNo()); - break; - } - } - } - if (NewV && NewV != V) { - Worklist.push_back(NewV); - continue; - } - - // Look through select instructions, visit both potential values. - if (auto *SI = dyn_cast<SelectInst>(V)) { - Worklist.push_back(SI->getTrueValue()); - Worklist.push_back(SI->getFalseValue()); - continue; - } - - // Look through phi nodes, visit all live operands. - if (auto *PHI = dyn_cast<PHINode>(V)) { - assert(LivenessAA && - "Expected liveness in the presence of instructions!"); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - const BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) { - AnyDead = true; - continue; - } - Worklist.push_back(PHI->getIncomingValue(u)); - } - continue; - } - - // Once a leaf is reached we inform the user through the callback. - if (!VisitValueCB(*V, State, Iteration > 1)) - return false; - } while (!Worklist.empty()); - - // If we actually used liveness information so we have to record a dependence. - if (AnyDead) - A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL); - - // All values have been visited. - return true; -} - -/// Return true if \p New is equal or worse than \p Old. -static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { - if (!Old.isIntAttribute()) - return true; - - return Old.getValueAsInt() >= New.getValueAsInt(); -} - -/// Return true if the information provided by \p Attr was added to the -/// attribute list \p Attrs. This is only the case if it was not already present -/// in \p Attrs at the position describe by \p PK and \p AttrIdx. -static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, - AttributeList &Attrs, int AttrIdx) { - - if (Attr.isEnumAttribute()) { - Attribute::AttrKind Kind = Attr.getKindAsEnum(); - if (Attrs.hasAttribute(AttrIdx, Kind)) - if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) - return false; - Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); - return true; - } - if (Attr.isStringAttribute()) { - StringRef Kind = Attr.getKindAsString(); - if (Attrs.hasAttribute(AttrIdx, Kind)) - if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) - return false; - Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); - return true; - } - if (Attr.isIntAttribute()) { - Attribute::AttrKind Kind = Attr.getKindAsEnum(); - if (Attrs.hasAttribute(AttrIdx, Kind)) - if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) - return false; - Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind); - Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); - return true; - } - - llvm_unreachable("Expected enum or string attribute!"); -} - -static const Value * -getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, - const DataLayout &DL, - bool AllowNonInbounds = false) { - const Value *Ptr = - Attributor::getPointerOperand(I, /* AllowVolatile */ false); - if (!Ptr) - return nullptr; - - return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, - AllowNonInbounds); -} - ChangeStatus AbstractAttribute::update(Attributor &A) { ChangeStatus HasChanged = ChangeStatus::UNCHANGED; if (getState().isAtFixpoint()) @@ -422,7 +211,7 @@ ChangeStatus AbstractAttribute::update(Attributor &A) { ChangeStatus IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef<Attribute> &DeducedAttrs) { - Function *ScopeFn = IRP.getAssociatedFunction(); + Function *ScopeFn = IRP.getAnchorScope(); IRPosition::Kind PK = IRP.getPositionKind(); // In the following some generic code that will manifest attributes in @@ -442,7 +231,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, case IRPosition::IRP_CALL_SITE: case IRPosition::IRP_CALL_SITE_RETURNED: case IRPosition::IRP_CALL_SITE_ARGUMENT: - Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes(); + Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes(); break; } @@ -467,7 +256,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, case IRPosition::IRP_CALL_SITE: case IRPosition::IRP_CALL_SITE_RETURNED: case IRPosition::IRP_CALL_SITE_ARGUMENT: - CallSite(&IRP.getAnchorValue()).setAttributes(Attrs); + cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs); break; case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: @@ -477,13 +266,14 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, return HasChanged; } -const IRPosition IRPosition::EmptyKey(255); -const IRPosition IRPosition::TombstoneKey(256); +const IRPosition IRPosition::EmptyKey(DenseMapInfo<void *>::getEmptyKey()); +const IRPosition + IRPosition::TombstoneKey(DenseMapInfo<void *>::getTombstoneKey()); SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRP); - ImmutableCallSite ICS(&IRP.getAnchorValue()); + const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue()); switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: @@ -491,37 +281,43 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { return; case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_RETURNED: - IRPositions.emplace_back( - IRPosition::function(*IRP.getAssociatedFunction())); + IRPositions.emplace_back(IRPosition::function(*IRP.getAnchorScope())); return; case IRPosition::IRP_CALL_SITE: - assert(ICS && "Expected call site!"); + assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!ICS.hasOperandBundles()) - if (const Function *Callee = ICS.getCalledFunction()) + if (!CB->hasOperandBundles()) + if (const Function *Callee = CB->getCalledFunction()) IRPositions.emplace_back(IRPosition::function(*Callee)); return; case IRPosition::IRP_CALL_SITE_RETURNED: - assert(ICS && "Expected call site!"); + assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!ICS.hasOperandBundles()) { - if (const Function *Callee = ICS.getCalledFunction()) { + if (!CB->hasOperandBundles()) { + if (const Function *Callee = CB->getCalledFunction()) { IRPositions.emplace_back(IRPosition::returned(*Callee)); IRPositions.emplace_back(IRPosition::function(*Callee)); + for (const Argument &Arg : Callee->args()) + if (Arg.hasReturnedAttr()) { + IRPositions.emplace_back( + IRPosition::callsite_argument(*CB, Arg.getArgNo())); + IRPositions.emplace_back( + IRPosition::value(*CB->getArgOperand(Arg.getArgNo()))); + IRPositions.emplace_back(IRPosition::argument(Arg)); + } } } - IRPositions.emplace_back( - IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction()))); + IRPositions.emplace_back(IRPosition::callsite_function(*CB)); return; case IRPosition::IRP_CALL_SITE_ARGUMENT: { int ArgNo = IRP.getArgNo(); - assert(ICS && ArgNo >= 0 && "Expected call site!"); + assert(CB && ArgNo >= 0 && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!ICS.hasOperandBundles()) { - const Function *Callee = ICS.getCalledFunction(); + if (!CB->hasOperandBundles()) { + const Function *Callee = CB->getCalledFunction(); if (Callee && Callee->arg_size() > unsigned(ArgNo)) IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); if (Callee) @@ -534,10 +330,11 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { } bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs, - bool IgnoreSubsumingPositions) const { + bool IgnoreSubsumingPositions, Attributor *A) const { + SmallVector<Attribute, 4> Attrs; for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) { for (Attribute::AttrKind AK : AKs) - if (EquivIRP.getAttr(AK).getKindAsEnum() == AK) + if (EquivIRP.getAttrsFromIRAttr(AK, Attrs)) return true; // The first position returned by the SubsumingPositionIterator is // always the position itself. If we ignore subsuming positions we @@ -545,5052 +342,300 @@ bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs, if (IgnoreSubsumingPositions) break; } + if (A) + for (Attribute::AttrKind AK : AKs) + if (getAttrsFromAssumes(AK, Attrs, *A)) + return true; return false; } void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs, SmallVectorImpl<Attribute> &Attrs, - bool IgnoreSubsumingPositions) const { + bool IgnoreSubsumingPositions, Attributor *A) const { for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) { - for (Attribute::AttrKind AK : AKs) { - const Attribute &Attr = EquivIRP.getAttr(AK); - if (Attr.getKindAsEnum() == AK) - Attrs.push_back(Attr); - } + for (Attribute::AttrKind AK : AKs) + EquivIRP.getAttrsFromIRAttr(AK, Attrs); // The first position returned by the SubsumingPositionIterator is // always the position itself. If we ignore subsuming positions we // are done after the first iteration. if (IgnoreSubsumingPositions) break; } + if (A) + for (Attribute::AttrKind AK : AKs) + getAttrsFromAssumes(AK, Attrs, *A); +} + +bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK, + SmallVectorImpl<Attribute> &Attrs) const { + if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) + return false; + + AttributeList AttrList; + if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue())) + AttrList = CB->getAttributes(); + else + AttrList = getAssociatedFunction()->getAttributes(); + + bool HasAttr = AttrList.hasAttribute(getAttrIdx(), AK); + if (HasAttr) + Attrs.push_back(AttrList.getAttribute(getAttrIdx(), AK)); + return HasAttr; +} + +bool IRPosition::getAttrsFromAssumes(Attribute::AttrKind AK, + SmallVectorImpl<Attribute> &Attrs, + Attributor &A) const { + assert(getPositionKind() != IRP_INVALID && "Did expect a valid position!"); + Value &AssociatedValue = getAssociatedValue(); + + const Assume2KnowledgeMap &A2K = + A.getInfoCache().getKnowledgeMap().lookup({&AssociatedValue, AK}); + + // Check if we found any potential assume use, if not we don't need to create + // explorer iterators. + if (A2K.empty()) + return false; + + LLVMContext &Ctx = AssociatedValue.getContext(); + unsigned AttrsSize = Attrs.size(); + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); + auto EIt = Explorer.begin(getCtxI()), EEnd = Explorer.end(getCtxI()); + for (auto &It : A2K) + if (Explorer.findInContextOf(It.first, EIt, EEnd)) + Attrs.push_back(Attribute::get(Ctx, AK, It.second.Max)); + return AttrsSize != Attrs.size(); } void IRPosition::verify() { - switch (KindOrArgNo) { - default: - assert(KindOrArgNo >= 0 && "Expected argument or call site argument!"); - assert((isa<CallBase>(AnchorVal) || isa<Argument>(AnchorVal)) && - "Expected call base or argument for positive attribute index!"); - if (isa<Argument>(AnchorVal)) { - assert(cast<Argument>(AnchorVal)->getArgNo() == unsigned(getArgNo()) && - "Argument number mismatch!"); - assert(cast<Argument>(AnchorVal) == &getAssociatedValue() && - "Associated value mismatch!"); - } else { - assert(cast<CallBase>(*AnchorVal).arg_size() > unsigned(getArgNo()) && - "Call site argument number mismatch!"); - assert(cast<CallBase>(*AnchorVal).getArgOperand(getArgNo()) == - &getAssociatedValue() && - "Associated value mismatch!"); - } - break; +#ifdef EXPENSIVE_CHECKS + switch (getPositionKind()) { case IRP_INVALID: - assert(!AnchorVal && "Expected no value for an invalid position!"); - break; + assert(!Enc.getOpaqueValue() && + "Expected a nullptr for an invalid position!"); + return; case IRP_FLOAT: assert((!isa<CallBase>(&getAssociatedValue()) && !isa<Argument>(&getAssociatedValue())) && "Expected specialized kind for call base and argument values!"); - break; + return; case IRP_RETURNED: - assert(isa<Function>(AnchorVal) && + assert(isa<Function>(getAsValuePtr()) && "Expected function for a 'returned' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; case IRP_CALL_SITE_RETURNED: - assert((isa<CallBase>(AnchorVal)) && + assert((isa<CallBase>(getAsValuePtr())) && "Expected call base for 'call site returned' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; case IRP_CALL_SITE: - assert((isa<CallBase>(AnchorVal)) && + assert((isa<CallBase>(getAsValuePtr())) && "Expected call base for 'call site function' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; case IRP_FUNCTION: - assert(isa<Function>(AnchorVal) && + assert(isa<Function>(getAsValuePtr()) && "Expected function for a 'function' position!"); - assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); - break; - } -} - -namespace { -/// Helper function to clamp a state \p S of type \p StateType with the -/// information in \p R and indicate/return if \p S did change (as-in update is -/// required to be run again). -template <typename StateType> -ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { - auto Assumed = S.getAssumed(); - S ^= R; - return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; -} - -/// Clamp the information known for all returned values of a function -/// (identified by \p QueryingAA) into \p S. -template <typename AAType, typename StateType = typename AAType::StateType> -static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, - StateType &S) { - LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for " - << QueryingAA << " into " << S << "\n"); - - assert((QueryingAA.getIRPosition().getPositionKind() == - IRPosition::IRP_RETURNED || - QueryingAA.getIRPosition().getPositionKind() == - IRPosition::IRP_CALL_SITE_RETURNED) && - "Can only clamp returned value states for a function returned or call " - "site returned position!"); - - // Use an optional state as there might not be any return values and we want - // to join (IntegerState::operator&) the state of all there are. - Optional<StateType> T; - - // Callback for each possibly returned value. - auto CheckReturnValue = [&](Value &RV) -> bool { - const IRPosition &RVPos = IRPosition::value(RV); - const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos); - LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() - << " @ " << RVPos << "\n"); - const StateType &AAS = static_cast<const StateType &>(AA.getState()); - if (T.hasValue()) - *T &= AAS; - else - T = AAS; - LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T - << "\n"); - return T->isValidState(); - }; - - if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) - S.indicatePessimisticFixpoint(); - else if (T.hasValue()) - S ^= *T; -} - -/// Helper class to compose two generic deduction -template <typename AAType, typename Base, typename StateType, - template <typename...> class F, template <typename...> class G> -struct AAComposeTwoGenericDeduction - : public F<AAType, G<AAType, Base, StateType>, StateType> { - AAComposeTwoGenericDeduction(const IRPosition &IRP) - : F<AAType, G<AAType, Base, StateType>, StateType>(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus ChangedF = - F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A); - ChangeStatus ChangedG = G<AAType, Base, StateType>::updateImpl(A); - return ChangedF | ChangedG; + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; + case IRP_ARGUMENT: + assert(isa<Argument>(getAsValuePtr()) && + "Expected argument for a 'argument' position!"); + assert(getAsValuePtr() == &getAssociatedValue() && + "Associated value mismatch!"); + return; + case IRP_CALL_SITE_ARGUMENT: { + Use *U = getAsUsePtr(); + assert(U && "Expected use for a 'call site argument' position!"); + assert(isa<CallBase>(U->getUser()) && + "Expected call base user for a 'call site argument' position!"); + assert(cast<CallBase>(U->getUser())->isArgOperand(U) && + "Expected call base argument operand for a 'call site argument' " + "position"); + assert(cast<CallBase>(U->getUser())->getArgOperandNo(U) == + unsigned(getArgNo()) && + "Argument number mismatch!"); + assert(U->get() == &getAssociatedValue() && "Associated value mismatch!"); + return; } -}; - -/// Helper class for generic deduction: return value -> returned position. -template <typename AAType, typename Base, - typename StateType = typename AAType::StateType> -struct AAReturnedFromReturnedValues : public Base { - AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - StateType S; - clampReturnedValueStates<AAType, StateType>(A, *this, S); - // TODO: If we know we visited all returned values, thus no are assumed - // dead, we can take the known information from the state T. - return clampStateAndIndicateChange<StateType>(this->getState(), S); } -}; - -/// Clamp the information known at all call sites for a given argument -/// (identified by \p QueryingAA) into \p S. -template <typename AAType, typename StateType = typename AAType::StateType> -static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, - StateType &S) { - LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for " - << QueryingAA << " into " << S << "\n"); - - assert(QueryingAA.getIRPosition().getPositionKind() == - IRPosition::IRP_ARGUMENT && - "Can only clamp call site argument states for an argument position!"); - - // Use an optional state as there might not be any return values and we want - // to join (IntegerState::operator&) the state of all there are. - Optional<StateType> T; - - // The argument number which is also the call site argument number. - unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); - - auto CallSiteCheck = [&](AbstractCallSite ACS) { - const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); - // Check if a coresponding argument was found or if it is on not associated - // (which can happen for callback calls). - if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) - return false; - - const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos); - LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() - << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); - const StateType &AAS = static_cast<const StateType &>(AA.getState()); - if (T.hasValue()) - *T &= AAS; - else - T = AAS; - LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T - << "\n"); - return T->isValidState(); - }; - - if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true)) - S.indicatePessimisticFixpoint(); - else if (T.hasValue()) - S ^= *T; +#endif } -/// Helper class for generic deduction: call site argument -> argument position. -template <typename AAType, typename Base, - typename StateType = typename AAType::StateType> -struct AAArgumentFromCallSiteArguments : public Base { - AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - StateType S; - clampCallSiteArgumentStates<AAType, StateType>(A, *this, S); - // TODO: If we know we visited all incoming values, thus no are assumed - // dead, we can take the known information from the state T. - return clampStateAndIndicateChange<StateType>(this->getState(), S); - } -}; - -/// Helper class for generic replication: function returned -> cs returned. -template <typename AAType, typename Base, - typename StateType = typename AAType::StateType> -struct AACallSiteReturnedFromReturned : public Base { - AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - assert(this->getIRPosition().getPositionKind() == - IRPosition::IRP_CALL_SITE_RETURNED && - "Can only wrap function returned positions for call site returned " - "positions!"); - auto &S = this->getState(); - - const Function *AssociatedFunction = - this->getIRPosition().getAssociatedFunction(); - if (!AssociatedFunction) - return S.indicatePessimisticFixpoint(); - - IRPosition FnPos = IRPosition::returned(*AssociatedFunction); - const AAType &AA = A.getAAFor<AAType>(*this, FnPos); - return clampStateAndIndicateChange( - S, static_cast<const typename AAType::StateType &>(AA.getState())); - } -}; - -/// Helper class for generic deduction using must-be-executed-context -/// Base class is required to have `followUse` method. - -/// bool followUse(Attributor &A, const Use *U, const Instruction *I) -/// U - Underlying use. -/// I - The user of the \p U. -/// `followUse` returns true if the value should be tracked transitively. - -template <typename AAType, typename Base, - typename StateType = typename AAType::StateType> -struct AAFromMustBeExecutedContext : public Base { - AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {} - - void initialize(Attributor &A) override { - Base::initialize(A); - const IRPosition &IRP = this->getIRPosition(); - Instruction *CtxI = IRP.getCtxI(); - - if (!CtxI) - return; - - for (const Use &U : IRP.getAssociatedValue().uses()) - Uses.insert(&U); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - auto BeforeState = this->getState(); - auto &S = this->getState(); - Instruction *CtxI = this->getIRPosition().getCtxI(); - if (!CtxI) - return ChangeStatus::UNCHANGED; - - MustBeExecutedContextExplorer &Explorer = - A.getInfoCache().getMustBeExecutedContextExplorer(); - - auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); - for (unsigned u = 0; u < Uses.size(); ++u) { - const Use *U = Uses[u]; - if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) { - bool Found = Explorer.findInContextOf(UserI, EIt, EEnd); - if (Found && Base::followUse(A, U, UserI)) - for (const Use &Us : UserI->uses()) - Uses.insert(&Us); - } - } - - return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; - } - -private: - /// Container for (transitive) uses of the associated value. - SetVector<const Use *> Uses; -}; - -template <typename AAType, typename Base, - typename StateType = typename AAType::StateType> -using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext = - AAComposeTwoGenericDeduction<AAType, Base, StateType, - AAFromMustBeExecutedContext, - AAArgumentFromCallSiteArguments>; - -template <typename AAType, typename Base, - typename StateType = typename AAType::StateType> -using AACallSiteReturnedFromReturnedAndMustBeExecutedContext = - AAComposeTwoGenericDeduction<AAType, Base, StateType, - AAFromMustBeExecutedContext, - AACallSiteReturnedFromReturned>; - -/// -----------------------NoUnwind Function Attribute-------------------------- - -struct AANoUnwindImpl : AANoUnwind { - AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {} - - const std::string getAsStr() const override { - return getAssumed() ? "nounwind" : "may-unwind"; - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - auto Opcodes = { - (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, - (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; - - auto CheckForNoUnwind = [&](Instruction &I) { - if (!I.mayThrow()) - return true; - - if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { - const auto &NoUnwindAA = - A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS)); - return NoUnwindAA.isAssumedNoUnwind(); - } - return false; - }; - - if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes)) - return indicatePessimisticFixpoint(); - - return ChangeStatus::UNCHANGED; - } -}; - -struct AANoUnwindFunction final : public AANoUnwindImpl { - AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) } -}; - -/// NoUnwind attribute deduction for a call sites. -struct AANoUnwindCallSite final : AANoUnwindImpl { - AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoUnwindImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AANoUnwind::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } -}; - -/// --------------------- Function Return Values ------------------------------- - -/// "Attribute" that collects all potential returned values and the return -/// instructions that they arise from. -/// -/// If there is a unique returned value R, the manifest method will: -/// - mark R with the "returned" attribute, if R is an argument. -class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { - - /// Mapping of values potentially returned by the associated function to the - /// return instructions that might return them. - MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues; - - /// Mapping to remember the number of returned values for a call site such - /// that we can avoid updates if nothing changed. - DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA; - - /// Set of unresolved calls returned by the associated function. - SmallSetVector<CallBase *, 4> UnresolvedCalls; - - /// State flags - /// - ///{ - bool IsFixed = false; - bool IsValidState = true; - ///} - -public: - AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - // Reset the state. - IsFixed = false; - IsValidState = true; - ReturnedValues.clear(); - - Function *F = getAssociatedFunction(); - if (!F) { - indicatePessimisticFixpoint(); - return; - } - - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F); - - // Look through all arguments, if one is marked as returned we are done. - for (Argument &Arg : F->args()) { - if (Arg.hasReturnedAttr()) { - auto &ReturnInstSet = ReturnedValues[&Arg]; - for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) - ReturnInstSet.insert(cast<ReturnInst>(RI)); - - indicateOptimisticFixpoint(); - return; - } - } - - if (!F->hasExactDefinition()) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override; - - /// See AbstractAttribute::getState(...). - AbstractState &getState() override { return *this; } - - /// See AbstractAttribute::getState(...). - const AbstractState &getState() const override { return *this; } - - /// See AbstractAttribute::updateImpl(Attributor &A). - ChangeStatus updateImpl(Attributor &A) override; - - llvm::iterator_range<iterator> returned_values() override { - return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); - } - - llvm::iterator_range<const_iterator> returned_values() const override { - return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); - } - - const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override { - return UnresolvedCalls; - } - - /// Return the number of potential return values, -1 if unknown. - size_t getNumReturnValues() const override { - return isValidState() ? ReturnedValues.size() : -1; - } - - /// Return an assumed unique return value if a single candidate is found. If - /// there cannot be one, return a nullptr. If it is not clear yet, return the - /// Optional::NoneType. - Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const; - - /// See AbstractState::checkForAllReturnedValues(...). - bool checkForAllReturnedValuesAndReturnInsts( - const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> - &Pred) const override; - - /// Pretty print the attribute similar to the IR representation. - const std::string getAsStr() const override; - - /// See AbstractState::isAtFixpoint(). - bool isAtFixpoint() const override { return IsFixed; } - - /// See AbstractState::isValidState(). - bool isValidState() const override { return IsValidState; } - - /// See AbstractState::indicateOptimisticFixpoint(...). - ChangeStatus indicateOptimisticFixpoint() override { - IsFixed = true; - return ChangeStatus::UNCHANGED; +Optional<Constant *> +Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA, + bool &UsedAssumedInformation) { + const auto &ValueSimplifyAA = getAAFor<AAValueSimplify>( + AA, IRPosition::value(V), /* TrackDependence */ false); + Optional<Value *> SimplifiedV = + ValueSimplifyAA.getAssumedSimplifiedValue(*this); + bool IsKnown = ValueSimplifyAA.isKnown(); + UsedAssumedInformation |= !IsKnown; + if (!SimplifiedV.hasValue()) { + recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); + return llvm::None; } - - ChangeStatus indicatePessimisticFixpoint() override { - IsFixed = true; - IsValidState = false; - return ChangeStatus::CHANGED; + if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) { + recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); + return llvm::None; } -}; - -ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - // Bookkeeping. - assert(isValidState()); - STATS_DECLTRACK(KnownReturnValues, FunctionReturn, - "Number of function with known return values"); - - // Check if we have an assumed unique return value that we could manifest. - Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A); - - if (!UniqueRV.hasValue() || !UniqueRV.getValue()) - return Changed; - - // Bookkeeping. - STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, - "Number of function with unique return"); - - // Callback to replace the uses of CB with the constant C. - auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) { - if (CB.getNumUses() == 0 || CB.isMustTailCall()) - return ChangeStatus::UNCHANGED; - replaceAllInstructionUsesWith(CB, C); - return ChangeStatus::CHANGED; - }; - - // If the assumed unique return value is an argument, annotate it. - if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) { - // TODO: This should be handled differently! - this->AnchorVal = UniqueRVArg; - this->KindOrArgNo = UniqueRVArg->getArgNo(); - Changed = IRAttribute::manifest(A); - } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) { - // We can replace the returned value with the unique returned constant. - Value &AnchorValue = getAnchorValue(); - if (Function *F = dyn_cast<Function>(&AnchorValue)) { - for (const Use &U : F->uses()) - if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) - if (CB->isCallee(&U)) { - Constant *RVCCast = - CB->getType() == RVC->getType() - ? RVC - : ConstantExpr::getTruncOrBitCast(RVC, CB->getType()); - Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed; - } - } else { - assert(isa<CallBase>(AnchorValue) && - "Expcected a function or call base anchor!"); - Constant *RVCCast = - AnchorValue.getType() == RVC->getType() - ? RVC - : ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType()); - Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast); - } - if (Changed == ChangeStatus::CHANGED) - STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn, - "Number of function returns replaced by constant return"); + Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.getValue()); + if (CI && CI->getType() != V.getType()) { + // TODO: Check for a save conversion. + return nullptr; } - - return Changed; -} - -const std::string AAReturnedValuesImpl::getAsStr() const { - return (isAtFixpoint() ? "returns(#" : "may-return(#") + - (isValidState() ? std::to_string(getNumReturnValues()) : "?") + - ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]"; + if (CI) + recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); + return CI; } -Optional<Value *> -AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const { - // If checkForAllReturnedValues provides a unique value, ignoring potential - // undef values that can also be present, it is assumed to be the actual - // return value and forwarded to the caller of this method. If there are - // multiple, a nullptr is returned indicating there cannot be a unique - // returned value. - Optional<Value *> UniqueRV; - - auto Pred = [&](Value &RV) -> bool { - // If we found a second returned value and neither the current nor the saved - // one is an undef, there is no unique returned value. Undefs are special - // since we can pretend they have any value. - if (UniqueRV.hasValue() && UniqueRV != &RV && - !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) { - UniqueRV = nullptr; - return false; - } - - // Do not overwrite a value with an undef. - if (!UniqueRV.hasValue() || !isa<UndefValue>(RV)) - UniqueRV = &RV; - - return true; - }; - - if (!A.checkForAllReturnedValues(Pred, *this)) - UniqueRV = nullptr; - - return UniqueRV; +Attributor::~Attributor() { + // The abstract attributes are allocated via the BumpPtrAllocator Allocator, + // thus we cannot delete them. We can, and want to, destruct them though. + for (AbstractAttribute *AA : AllAbstractAttributes) + AA->~AbstractAttribute(); } -bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( - const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> - &Pred) const { - if (!isValidState()) +bool Attributor::isAssumedDead(const AbstractAttribute &AA, + const AAIsDead *FnLivenessAA, + bool CheckBBLivenessOnly, DepClassTy DepClass) { + const IRPosition &IRP = AA.getIRPosition(); + if (!Functions.count(IRP.getAnchorScope())) return false; - - // Check all returned values but ignore call sites as long as we have not - // encountered an overdefined one during an update. - for (auto &It : ReturnedValues) { - Value *RV = It.first; - - CallBase *CB = dyn_cast<CallBase>(RV); - if (CB && !UnresolvedCalls.count(CB)) - continue; - - if (!Pred(*RV, It.second)) - return false; - } - - return true; -} - -ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { - size_t NumUnresolvedCalls = UnresolvedCalls.size(); - bool Changed = false; - - // State used in the value traversals starting in returned values. - struct RVState { - // The map in which we collect return values -> return instrs. - decltype(ReturnedValues) &RetValsMap; - // The flag to indicate a change. - bool &Changed; - // The return instrs we come from. - SmallSetVector<ReturnInst *, 4> RetInsts; - }; - - // Callback for a leaf value returned by the associated function. - auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool { - auto Size = RVS.RetValsMap[&Val].size(); - RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end()); - bool Inserted = RVS.RetValsMap[&Val].size() != Size; - RVS.Changed |= Inserted; - LLVM_DEBUG({ - if (Inserted) - dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val - << " => " << RVS.RetInsts.size() << "\n"; - }); + return isAssumedDead(IRP, &AA, FnLivenessAA, CheckBBLivenessOnly, DepClass); +} + +bool Attributor::isAssumedDead(const Use &U, + const AbstractAttribute *QueryingAA, + const AAIsDead *FnLivenessAA, + bool CheckBBLivenessOnly, DepClassTy DepClass) { + Instruction *UserI = dyn_cast<Instruction>(U.getUser()); + if (!UserI) + return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA, + CheckBBLivenessOnly, DepClass); + + if (auto *CB = dyn_cast<CallBase>(UserI)) { + // For call site argument uses we can check if the argument is + // unused/dead. + if (CB->isArgOperand(&U)) { + const IRPosition &CSArgPos = + IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)); + return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA, + CheckBBLivenessOnly, DepClass); + } + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) { + const IRPosition &RetPos = IRPosition::returned(*RI->getFunction()); + return isAssumedDead(RetPos, QueryingAA, FnLivenessAA, CheckBBLivenessOnly, + DepClass); + } else if (PHINode *PHI = dyn_cast<PHINode>(UserI)) { + BasicBlock *IncomingBB = PHI->getIncomingBlock(U); + return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA, + CheckBBLivenessOnly, DepClass); + } + + return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA, + CheckBBLivenessOnly, DepClass); +} + +bool Attributor::isAssumedDead(const Instruction &I, + const AbstractAttribute *QueryingAA, + const AAIsDead *FnLivenessAA, + bool CheckBBLivenessOnly, DepClassTy DepClass) { + if (!FnLivenessAA) + FnLivenessAA = lookupAAFor<AAIsDead>(IRPosition::function(*I.getFunction()), + QueryingAA, + /* TrackDependence */ false); + + // If we have a context instruction and a liveness AA we use it. + if (FnLivenessAA && + FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() && + FnLivenessAA->isAssumedDead(&I)) { + if (QueryingAA) + recordDependence(*FnLivenessAA, *QueryingAA, DepClass); return true; - }; - - // Helper method to invoke the generic value traversal. - auto VisitReturnedValue = [&](Value &RV, RVState &RVS) { - IRPosition RetValPos = IRPosition::value(RV); - return genericValueTraversal<AAReturnedValues, RVState>(A, RetValPos, *this, - RVS, VisitValueCB); - }; - - // Callback for all "return intructions" live in the associated function. - auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) { - ReturnInst &Ret = cast<ReturnInst>(I); - RVState RVS({ReturnedValues, Changed, {}}); - RVS.RetInsts.insert(&Ret); - return VisitReturnedValue(*Ret.getReturnValue(), RVS); - }; - - // Start by discovering returned values from all live returned instructions in - // the associated function. - if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret})) - return indicatePessimisticFixpoint(); - - // Once returned values "directly" present in the code are handled we try to - // resolve returned calls. - decltype(ReturnedValues) NewRVsMap; - for (auto &It : ReturnedValues) { - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first - << " by #" << It.second.size() << " RIs\n"); - CallBase *CB = dyn_cast<CallBase>(It.first); - if (!CB || UnresolvedCalls.count(CB)) - continue; - - if (!CB->getCalledFunction()) { - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB - << "\n"); - UnresolvedCalls.insert(CB); - continue; - } - - // TODO: use the function scope once we have call site AAReturnedValues. - const auto &RetValAA = A.getAAFor<AAReturnedValues>( - *this, IRPosition::function(*CB->getCalledFunction())); - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: " - << RetValAA << "\n"); - - // Skip dead ends, thus if we do not know anything about the returned - // call we mark it as unresolved and it will stay that way. - if (!RetValAA.getState().isValidState()) { - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB - << "\n"); - UnresolvedCalls.insert(CB); - continue; - } - - // Do not try to learn partial information. If the callee has unresolved - // return values we will treat the call as unresolved/opaque. - auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls(); - if (!RetValAAUnresolvedCalls.empty()) { - UnresolvedCalls.insert(CB); - continue; - } - - // Now check if we can track transitively returned values. If possible, thus - // if all return value can be represented in the current scope, do so. - bool Unresolved = false; - for (auto &RetValAAIt : RetValAA.returned_values()) { - Value *RetVal = RetValAAIt.first; - if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) || - isa<Constant>(RetVal)) - continue; - // Anything that did not fit in the above categories cannot be resolved, - // mark the call as unresolved. - LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value " - "cannot be translated: " - << *RetVal << "\n"); - UnresolvedCalls.insert(CB); - Unresolved = true; - break; - } - - if (Unresolved) - continue; - - // Now track transitively returned values. - unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB]; - if (NumRetAA == RetValAA.getNumReturnValues()) { - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not " - "changed since it was seen last\n"); - continue; - } - NumRetAA = RetValAA.getNumReturnValues(); - - for (auto &RetValAAIt : RetValAA.returned_values()) { - Value *RetVal = RetValAAIt.first; - if (Argument *Arg = dyn_cast<Argument>(RetVal)) { - // Arguments are mapped to call site operands and we begin the traversal - // again. - bool Unused = false; - RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); - VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS); - continue; - } else if (isa<CallBase>(RetVal)) { - // Call sites are resolved by the callee attribute over time, no need to - // do anything for us. - continue; - } else if (isa<Constant>(RetVal)) { - // Constants are valid everywhere, we can simply take them. - NewRVsMap[RetVal].insert(It.second.begin(), It.second.end()); - continue; - } - } - } - - // To avoid modifications to the ReturnedValues map while we iterate over it - // we kept record of potential new entries in a copy map, NewRVsMap. - for (auto &It : NewRVsMap) { - assert(!It.second.empty() && "Entry does not add anything."); - auto &ReturnInsts = ReturnedValues[It.first]; - for (ReturnInst *RI : It.second) - if (ReturnInsts.insert(RI)) { - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value " - << *It.first << " => " << *RI << "\n"); - Changed = true; - } - } - - Changed |= (NumUnresolvedCalls != UnresolvedCalls.size()); - return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; -} - -struct AAReturnedValuesFunction final : public AAReturnedValuesImpl { - AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) } -}; - -/// Returned values information for a call sites. -struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { - AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites instead of - // redirecting requests to the callee. - llvm_unreachable("Abstract attributes for returned values are not " - "supported for call sites yet!"); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - return indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} -}; - -/// ------------------------ NoSync Function Attribute ------------------------- - -struct AANoSyncImpl : AANoSync { - AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {} - - const std::string getAsStr() const override { - return getAssumed() ? "nosync" : "may-sync"; - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; - - /// Helper function used to determine whether an instruction is non-relaxed - /// atomic. In other words, if an atomic instruction does not have unordered - /// or monotonic ordering - static bool isNonRelaxedAtomic(Instruction *I); - - /// Helper function used to determine whether an instruction is volatile. - static bool isVolatile(Instruction *I); - - /// Helper function uset to check if intrinsic is volatile (memcpy, memmove, - /// memset). - static bool isNoSyncIntrinsic(Instruction *I); -}; - -bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { - if (!I->isAtomic()) - return false; - - AtomicOrdering Ordering; - switch (I->getOpcode()) { - case Instruction::AtomicRMW: - Ordering = cast<AtomicRMWInst>(I)->getOrdering(); - break; - case Instruction::Store: - Ordering = cast<StoreInst>(I)->getOrdering(); - break; - case Instruction::Load: - Ordering = cast<LoadInst>(I)->getOrdering(); - break; - case Instruction::Fence: { - auto *FI = cast<FenceInst>(I); - if (FI->getSyncScopeID() == SyncScope::SingleThread) - return false; - Ordering = FI->getOrdering(); - break; - } - case Instruction::AtomicCmpXchg: { - AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering(); - AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering(); - // Only if both are relaxed, than it can be treated as relaxed. - // Otherwise it is non-relaxed. - if (Success != AtomicOrdering::Unordered && - Success != AtomicOrdering::Monotonic) - return true; - if (Failure != AtomicOrdering::Unordered && - Failure != AtomicOrdering::Monotonic) - return true; - return false; - } - default: - llvm_unreachable( - "New atomic operations need to be known in the attributor."); - } - - // Relaxed. - if (Ordering == AtomicOrdering::Unordered || - Ordering == AtomicOrdering::Monotonic) - return false; - return true; -} - -/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics. -/// FIXME: We should ipmrove the handling of intrinsics. -bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { - if (auto *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - /// Element wise atomic memory intrinsics are can only be unordered, - /// therefore nosync. - case Intrinsic::memset_element_unordered_atomic: - case Intrinsic::memmove_element_unordered_atomic: - case Intrinsic::memcpy_element_unordered_atomic: - return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - if (!cast<MemIntrinsic>(II)->isVolatile()) - return true; - return false; - default: - return false; - } } - return false; -} - -bool AANoSyncImpl::isVolatile(Instruction *I) { - assert(!ImmutableCallSite(I) && !isa<CallBase>(I) && - "Calls should not be checked here"); - - switch (I->getOpcode()) { - case Instruction::AtomicRMW: - return cast<AtomicRMWInst>(I)->isVolatile(); - case Instruction::Store: - return cast<StoreInst>(I)->isVolatile(); - case Instruction::Load: - return cast<LoadInst>(I)->isVolatile(); - case Instruction::AtomicCmpXchg: - return cast<AtomicCmpXchgInst>(I)->isVolatile(); - default: - return false; - } -} - -ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { - - auto CheckRWInstForNoSync = [&](Instruction &I) { - /// We are looking for volatile instructions or Non-Relaxed atomics. - /// FIXME: We should improve the handling of intrinsics. - - if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I)) - return true; - - if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { - if (ICS.hasFnAttr(Attribute::NoSync)) - return true; - - const auto &NoSyncAA = - A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS)); - if (NoSyncAA.isAssumedNoSync()) - return true; - return false; - } - - if (!isVolatile(&I) && !isNonRelaxedAtomic(&I)) - return true; + if (CheckBBLivenessOnly) return false; - }; - - auto CheckForNoSync = [&](Instruction &I) { - // At this point we handled all read/write effects and they are all - // nosync, so they can be skipped. - if (I.mayReadOrWriteMemory()) - return true; - - // non-convergent and readnone imply nosync. - return !ImmutableCallSite(&I).isConvergent(); - }; - - if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) || - !A.checkForAllCallLikeInstructions(CheckForNoSync, *this)) - return indicatePessimisticFixpoint(); - - return ChangeStatus::UNCHANGED; -} - -struct AANoSyncFunction final : public AANoSyncImpl { - AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) } -}; - -/// NoSync attribute deduction for a call sites. -struct AANoSyncCallSite final : AANoSyncImpl { - AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoSyncImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), static_cast<const AANoSync::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } -}; - -/// ------------------------ No-Free Attributes ---------------------------- - -struct AANoFreeImpl : public AANoFree { - AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - auto CheckForNoFree = [&](Instruction &I) { - ImmutableCallSite ICS(&I); - if (ICS.hasFnAttr(Attribute::NoFree)) - return true; - - const auto &NoFreeAA = - A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS)); - return NoFreeAA.isAssumedNoFree(); - }; - - if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this)) - return indicatePessimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return getAssumed() ? "nofree" : "may-free"; - } -}; - -struct AANoFreeFunction final : public AANoFreeImpl { - AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) } -}; - -/// NoFree attribute deduction for a call sites. -struct AANoFreeCallSite final : AANoFreeImpl { - AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoFreeImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), static_cast<const AANoFree::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); } -}; - -/// NoFree attribute for floating values. -struct AANoFreeFloating : AANoFreeImpl { - AANoFreeFloating(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_FLOATING_ATTR(nofree)} - - /// See Abstract Attribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - const IRPosition &IRP = getIRPosition(); - - const auto &NoFreeAA = - A.getAAFor<AANoFree>(*this, IRPosition::function_scope(IRP)); - if (NoFreeAA.isAssumedNoFree()) - return ChangeStatus::UNCHANGED; - - Value &AssociatedValue = getIRPosition().getAssociatedValue(); - auto Pred = [&](const Use &U, bool &Follow) -> bool { - Instruction *UserI = cast<Instruction>(U.getUser()); - if (auto *CB = dyn_cast<CallBase>(UserI)) { - if (CB->isBundleOperand(&U)) - return false; - if (!CB->isArgOperand(&U)) - return true; - unsigned ArgNo = CB->getArgOperandNo(&U); - - const auto &NoFreeArg = A.getAAFor<AANoFree>( - *this, IRPosition::callsite_argument(*CB, ArgNo)); - return NoFreeArg.isAssumedNoFree(); - } - - if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) || - isa<PHINode>(UserI) || isa<SelectInst>(UserI)) { - Follow = true; - return true; - } - - // Unknown user. - return false; - }; - if (!A.checkForAllUses(Pred, *this, AssociatedValue)) - return indicatePessimisticFixpoint(); - - return ChangeStatus::UNCHANGED; - } -}; - -/// NoFree attribute for a call site argument. -struct AANoFreeArgument final : AANoFreeFloating { - AANoFreeArgument(const IRPosition &IRP) : AANoFreeFloating(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofree) } -}; - -/// NoFree attribute for call site arguments. -struct AANoFreeCallSiteArgument final : AANoFreeFloating { - AANoFreeCallSiteArgument(const IRPosition &IRP) : AANoFreeFloating(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Argument *Arg = getAssociatedArgument(); - if (!Arg) - return indicatePessimisticFixpoint(); - const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), static_cast<const AANoFree::StateType &>(ArgAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)}; -}; - -/// NoFree attribute for function return value. -struct AANoFreeReturned final : AANoFreeFloating { - AANoFreeReturned(const IRPosition &IRP) : AANoFreeFloating(IRP) { - llvm_unreachable("NoFree is not applicable to function returns!"); - } - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - llvm_unreachable("NoFree is not applicable to function returns!"); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable("NoFree is not applicable to function returns!"); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} -}; - -/// NoFree attribute deduction for a call site return value. -struct AANoFreeCallSiteReturned final : AANoFreeFloating { - AANoFreeCallSiteReturned(const IRPosition &IRP) : AANoFreeFloating(IRP) {} - - ChangeStatus manifest(Attributor &A) override { - return ChangeStatus::UNCHANGED; - } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) } -}; - -/// ------------------------ NonNull Argument Attribute ------------------------ -static int64_t getKnownNonNullAndDerefBytesForUse( - Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue, - const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { - TrackUse = false; - - const Value *UseV = U->get(); - if (!UseV->getType()->isPointerTy()) - return 0; - - Type *PtrTy = UseV->getType(); - const Function *F = I->getFunction(); - bool NullPointerIsDefined = - F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true; - const DataLayout &DL = A.getInfoCache().getDL(); - if (ImmutableCallSite ICS = ImmutableCallSite(I)) { - if (ICS.isBundleOperand(U)) - return 0; - - if (ICS.isCallee(U)) { - IsNonNull |= !NullPointerIsDefined; - return 0; - } - - unsigned ArgNo = ICS.getArgumentNo(U); - IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo); - // As long as we only use known information there is no need to track - // dependences here. - auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP, - /* TrackDependence */ false); - IsNonNull |= DerefAA.isKnownNonNull(); - return DerefAA.getKnownDereferenceableBytes(); - } - - // We need to follow common pointer manipulation uses to the accesses they - // feed into. We can try to be smart to avoid looking through things we do not - // like for now, e.g., non-inbounds GEPs. - if (isa<CastInst>(I)) { - TrackUse = true; - return 0; - } - if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) - if (GEP->hasAllConstantIndices()) { - TrackUse = true; - return 0; - } - - int64_t Offset; - if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { - if (Base == &AssociatedValue && - Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) { - int64_t DerefBytes = - (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset; - - IsNonNull |= !NullPointerIsDefined; - return std::max(int64_t(0), DerefBytes); - } - } - - /// Corner case when an offset is 0. - if (const Value *Base = getBasePointerOfAccessPointerOperand( - I, Offset, DL, /*AllowNonInbounds*/ true)) { - if (Offset == 0 && Base == &AssociatedValue && - Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) { - int64_t DerefBytes = - (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); - IsNonNull |= !NullPointerIsDefined; - return std::max(int64_t(0), DerefBytes); - } - } - - return 0; -} - -struct AANonNullImpl : AANonNull { - AANonNullImpl(const IRPosition &IRP) - : AANonNull(IRP), - NullIsDefined(NullPointerIsDefined( - getAnchorScope(), - getAssociatedValue().getType()->getPointerAddressSpace())) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (!NullIsDefined && - hasAttr({Attribute::NonNull, Attribute::Dereferenceable})) - indicateOptimisticFixpoint(); - else if (isa<ConstantPointerNull>(getAssociatedValue())) - indicatePessimisticFixpoint(); - else - AANonNull::initialize(A); - } - - /// See AAFromMustBeExecutedContext - bool followUse(Attributor &A, const Use *U, const Instruction *I) { - bool IsNonNull = false; - bool TrackUse = false; - getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I, - IsNonNull, TrackUse); - setKnown(IsNonNull); - return TrackUse; - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return getAssumed() ? "nonnull" : "may-null"; - } - - /// Flag to determine if the underlying value can be null and still allow - /// valid accesses. - const bool NullIsDefined; -}; - -/// NonNull attribute for a floating value. -struct AANonNullFloating - : AAFromMustBeExecutedContext<AANonNull, AANonNullImpl> { - using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>; - AANonNullFloating(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Change = Base::updateImpl(A); - if (isKnownNonNull()) - return Change; - - if (!NullIsDefined) { - const auto &DerefAA = - A.getAAFor<AADereferenceable>(*this, getIRPosition()); - if (DerefAA.getAssumedDereferenceableBytes()) - return Change; - } - - const DataLayout &DL = A.getDataLayout(); - - DominatorTree *DT = nullptr; - InformationCache &InfoCache = A.getInfoCache(); - if (const Function *Fn = getAnchorScope()) - DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn); - - auto VisitValueCB = [&](Value &V, AANonNull::StateType &T, - bool Stripped) -> bool { - const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V)); - if (!Stripped && this == &AA) { - if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, getCtxI(), DT)) - T.indicatePessimisticFixpoint(); - } else { - // Use abstract attribute information. - const AANonNull::StateType &NS = - static_cast<const AANonNull::StateType &>(AA.getState()); - T ^= NS; - } - return T.isValidState(); - }; - - StateType T; - if (!genericValueTraversal<AANonNull, StateType>(A, getIRPosition(), *this, - T, VisitValueCB)) - return indicatePessimisticFixpoint(); - - return clampStateAndIndicateChange(getState(), T); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } -}; - -/// NonNull attribute for function return value. -struct AANonNullReturned final - : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> { - AANonNullReturned(const IRPosition &IRP) - : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } -}; - -/// NonNull attribute for function argument. -struct AANonNullArgument final - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull, - AANonNullImpl> { - AANonNullArgument(const IRPosition &IRP) - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull, - AANonNullImpl>( - IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) } -}; - -struct AANonNullCallSiteArgument final : AANonNullFloating { - AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) } -}; - -/// NonNull attribute for a call site return position. -struct AANonNullCallSiteReturned final - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull, - AANonNullImpl> { - AANonNullCallSiteReturned(const IRPosition &IRP) - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull, - AANonNullImpl>( - IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } -}; - -/// ------------------------ No-Recurse Attributes ---------------------------- - -struct AANoRecurseImpl : public AANoRecurse { - AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {} - - /// See AbstractAttribute::getAsStr() - const std::string getAsStr() const override { - return getAssumed() ? "norecurse" : "may-recurse"; - } -}; - -struct AANoRecurseFunction final : AANoRecurseImpl { - AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoRecurseImpl::initialize(A); - if (const Function *F = getAnchorScope()) - if (A.getInfoCache().getSccSize(*F) == 1) - return; - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - - auto CheckForNoRecurse = [&](Instruction &I) { - ImmutableCallSite ICS(&I); - if (ICS.hasFnAttr(Attribute::NoRecurse)) - return true; - - const auto &NoRecurseAA = - A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS)); - if (!NoRecurseAA.isAssumedNoRecurse()) - return false; - - // Recursion to the same function - if (ICS.getCalledFunction() == getAnchorScope()) - return false; - - return true; - }; - - if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this)) - return indicatePessimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } - - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) } -}; - -/// NoRecurse attribute deduction for a call sites. -struct AANoRecurseCallSite final : AANoRecurseImpl { - AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoRecurseImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AANoRecurse::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } -}; - -/// -------------------- Undefined-Behavior Attributes ------------------------ - -struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { - AAUndefinedBehaviorImpl(const IRPosition &IRP) : AAUndefinedBehavior(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - // through a pointer (i.e. also branches etc.) - ChangeStatus updateImpl(Attributor &A) override { - const size_t UBPrevSize = KnownUBInsts.size(); - const size_t NoUBPrevSize = AssumedNoUBInsts.size(); - - auto InspectMemAccessInstForUB = [&](Instruction &I) { - // Skip instructions that are already saved. - if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) - return true; - - // If we reach here, we know we have an instruction - // that accesses memory through a pointer operand, - // for which getPointerOperand() should give it to us. - const Value *PtrOp = - Attributor::getPointerOperand(&I, /* AllowVolatile */ true); - assert(PtrOp && - "Expected pointer operand of memory accessing instruction"); - - // A memory access through a pointer is considered UB - // only if the pointer has constant null value. - // TODO: Expand it to not only check constant values. - if (!isa<ConstantPointerNull>(PtrOp)) { - AssumedNoUBInsts.insert(&I); - return true; - } - const Type *PtrTy = PtrOp->getType(); - - // Because we only consider instructions inside functions, - // assume that a parent function exists. - const Function *F = I.getFunction(); - - // A memory access using constant null pointer is only considered UB - // if null pointer is _not_ defined for the target platform. - if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace())) - AssumedNoUBInsts.insert(&I); - else - KnownUBInsts.insert(&I); - return true; - }; - - auto InspectBrInstForUB = [&](Instruction &I) { - // A conditional branch instruction is considered UB if it has `undef` - // condition. - - // Skip instructions that are already saved. - if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) - return true; - - // We know we have a branch instruction. - auto BrInst = cast<BranchInst>(&I); - - // Unconditional branches are never considered UB. - if (BrInst->isUnconditional()) - return true; - - // Either we stopped and the appropriate action was taken, - // or we got back a simplified value to continue. - Optional<Value *> SimplifiedCond = - stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst); - if (!SimplifiedCond.hasValue()) - return true; - AssumedNoUBInsts.insert(&I); - return true; - }; - - A.checkForAllInstructions(InspectMemAccessInstForUB, *this, - {Instruction::Load, Instruction::Store, - Instruction::AtomicCmpXchg, - Instruction::AtomicRMW}); - A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br}); - if (NoUBPrevSize != AssumedNoUBInsts.size() || - UBPrevSize != KnownUBInsts.size()) - return ChangeStatus::CHANGED; - return ChangeStatus::UNCHANGED; - } - - bool isKnownToCauseUB(Instruction *I) const override { - return KnownUBInsts.count(I); - } - - bool isAssumedToCauseUB(Instruction *I) const override { - // In simple words, if an instruction is not in the assumed to _not_ - // cause UB, then it is assumed UB (that includes those - // in the KnownUBInsts set). The rest is boilerplate - // is to ensure that it is one of the instructions we test - // for UB. - - switch (I->getOpcode()) { - case Instruction::Load: - case Instruction::Store: - case Instruction::AtomicCmpXchg: - case Instruction::AtomicRMW: - return !AssumedNoUBInsts.count(I); - case Instruction::Br: { - auto BrInst = cast<BranchInst>(I); - if (BrInst->isUnconditional()) - return false; - return !AssumedNoUBInsts.count(I); - } break; - default: - return false; - } + const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>( + IRPosition::value(I), QueryingAA, /* TrackDependence */ false); + // Don't check liveness for AAIsDead. + if (QueryingAA == &IsDeadAA) return false; - } - ChangeStatus manifest(Attributor &A) override { - if (KnownUBInsts.empty()) - return ChangeStatus::UNCHANGED; - for (Instruction *I : KnownUBInsts) - A.changeToUnreachableAfterManifest(I); - return ChangeStatus::CHANGED; - } - - /// See AbstractAttribute::getAsStr() - const std::string getAsStr() const override { - return getAssumed() ? "undefined-behavior" : "no-ub"; - } - - /// Note: The correctness of this analysis depends on the fact that the - /// following 2 sets will stop changing after some point. - /// "Change" here means that their size changes. - /// The size of each set is monotonically increasing - /// (we only add items to them) and it is upper bounded by the number of - /// instructions in the processed function (we can never save more - /// elements in either set than this number). Hence, at some point, - /// they will stop increasing. - /// Consequently, at some point, both sets will have stopped - /// changing, effectively making the analysis reach a fixpoint. - - /// Note: These 2 sets are disjoint and an instruction can be considered - /// one of 3 things: - /// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in - /// the KnownUBInsts set. - /// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior - /// has a reason to assume it). - /// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior - /// could not find a reason to assume or prove that it can cause UB, - /// hence it assumes it doesn't. We have a set for these instructions - /// so that we don't reprocess them in every update. - /// Note however that instructions in this set may cause UB. - -protected: - /// A set of all live instructions _known_ to cause UB. - SmallPtrSet<Instruction *, 8> KnownUBInsts; - -private: - /// A set of all the (live) instructions that are assumed to _not_ cause UB. - SmallPtrSet<Instruction *, 8> AssumedNoUBInsts; - - // Should be called on updates in which if we're processing an instruction - // \p I that depends on a value \p V, one of the following has to happen: - // - If the value is assumed, then stop. - // - If the value is known but undef, then consider it UB. - // - Otherwise, do specific processing with the simplified value. - // We return None in the first 2 cases to signify that an appropriate - // action was taken and the caller should stop. - // Otherwise, we return the simplified value that the caller should - // use for specific processing. - Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V, - Instruction *I) { - const auto &ValueSimplifyAA = - A.getAAFor<AAValueSimplify>(*this, IRPosition::value(*V)); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(A); - if (!ValueSimplifyAA.isKnown()) { - // Don't depend on assumed values. - return llvm::None; - } - if (!SimplifiedV.hasValue()) { - // If it is known (which we tested above) but it doesn't have a value, - // then we can assume `undef` and hence the instruction is UB. - KnownUBInsts.insert(I); - return llvm::None; - } - Value *Val = SimplifiedV.getValue(); - if (isa<UndefValue>(Val)) { - KnownUBInsts.insert(I); - return llvm::None; - } - return Val; - } -}; - -struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl { - AAUndefinedBehaviorFunction(const IRPosition &IRP) - : AAUndefinedBehaviorImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECL(UndefinedBehaviorInstruction, Instruction, - "Number of instructions known to have UB"); - BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) += - KnownUBInsts.size(); + if (IsDeadAA.isAssumedDead()) { + if (QueryingAA) + recordDependence(IsDeadAA, *QueryingAA, DepClass); + return true; } -}; -/// ------------------------ Will-Return Attributes ---------------------------- - -// Helper function that checks whether a function has any cycle. -// TODO: Replace with more efficent code -static bool containsCycle(Function &F) { - SmallPtrSet<BasicBlock *, 32> Visited; - - // Traverse BB by dfs and check whether successor is already visited. - for (BasicBlock *BB : depth_first(&F)) { - Visited.insert(BB); - for (auto *SuccBB : successors(BB)) { - if (Visited.count(SuccBB)) - return true; - } - } return false; } -// Helper function that checks the function have a loop which might become an -// endless loop -// FIXME: Any cycle is regarded as endless loop for now. -// We have to allow some patterns. -static bool containsPossiblyEndlessLoop(Function *F) { - return !F || !F->hasExactDefinition() || containsCycle(*F); -} - -struct AAWillReturnImpl : public AAWillReturn { - AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAWillReturn::initialize(A); - - Function *F = getAssociatedFunction(); - if (containsPossiblyEndlessLoop(F)) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - auto CheckForWillReturn = [&](Instruction &I) { - IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I)); - const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos); - if (WillReturnAA.isKnownWillReturn()) - return true; - if (!WillReturnAA.isAssumedWillReturn()) - return false; - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos); - return NoRecurseAA.isAssumedNoRecurse(); - }; - - if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this)) - return indicatePessimisticFixpoint(); - - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::getAsStr() - const std::string getAsStr() const override { - return getAssumed() ? "willreturn" : "may-noreturn"; - } -}; - -struct AAWillReturnFunction final : AAWillReturnImpl { - AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) } -}; - -/// WillReturn attribute deduction for a call sites. -struct AAWillReturnCallSite final : AAWillReturnImpl { - AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAWillReturnImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AAWillReturn::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } -}; - -/// -------------------AAReachability Attribute-------------------------- - -struct AAReachabilityImpl : AAReachability { - AAReachabilityImpl(const IRPosition &IRP) : AAReachability(IRP) {} - - const std::string getAsStr() const override { - // TODO: Return the number of reachable queries. - return "reachable"; - } - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { indicatePessimisticFixpoint(); } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - return indicatePessimisticFixpoint(); - } -}; - -struct AAReachabilityFunction final : public AAReachabilityImpl { - AAReachabilityFunction(const IRPosition &IRP) : AAReachabilityImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); } -}; - -/// ------------------------ NoAlias Argument Attribute ------------------------ - -struct AANoAliasImpl : AANoAlias { - AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {} - - const std::string getAsStr() const override { - return getAssumed() ? "noalias" : "may-alias"; - } -}; - -/// NoAlias attribute for a floating value. -struct AANoAliasFloating final : AANoAliasImpl { - AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoAliasImpl::initialize(A); - Value &Val = getAssociatedValue(); - if (isa<AllocaInst>(Val)) - indicateOptimisticFixpoint(); - if (isa<ConstantPointerNull>(Val) && - Val.getType()->getPointerAddressSpace() == 0) - indicateOptimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Implement this. - return indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(noalias) - } -}; - -/// NoAlias attribute for an argument. -struct AANoAliasArgument final - : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> { - using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>; - AANoAliasArgument(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::update(...). - ChangeStatus updateImpl(Attributor &A) override { - // We have to make sure no-alias on the argument does not break - // synchronization when this is a callback argument, see also [1] below. - // If synchronization cannot be affected, we delegate to the base updateImpl - // function, otherwise we give up for now. - - // If the function is no-sync, no-alias cannot break synchronization. - const auto &NoSyncAA = A.getAAFor<AANoSync>( - *this, IRPosition::function_scope(getIRPosition())); - if (NoSyncAA.isAssumedNoSync()) - return Base::updateImpl(A); - - // If the argument is read-only, no-alias cannot break synchronization. - const auto &MemBehaviorAA = - A.getAAFor<AAMemoryBehavior>(*this, getIRPosition()); - if (MemBehaviorAA.isAssumedReadOnly()) - return Base::updateImpl(A); - - // If the argument is never passed through callbacks, no-alias cannot break - // synchronization. - if (A.checkForAllCallSites( - [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this, - true)) - return Base::updateImpl(A); - - // TODO: add no-alias but make sure it doesn't break synchronization by - // introducing fake uses. See: - // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel, - // International Workshop on OpenMP 2018, - // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf - - return indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } -}; - -struct AANoAliasCallSiteArgument final : AANoAliasImpl { - AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - // See callsite argument attribute and callee argument attribute. - ImmutableCallSite ICS(&getAnchorValue()); - if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias)) - indicateOptimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // We can deduce "noalias" if the following conditions hold. - // (i) Associated value is assumed to be noalias in the definition. - // (ii) Associated value is assumed to be no-capture in all the uses - // possibly executed before this callsite. - // (iii) There is no other pointer argument which could alias with the - // value. - - const Value &V = getAssociatedValue(); - const IRPosition IRP = IRPosition::value(V); - - // (i) Check whether noalias holds in the definition. - - auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP); - LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] check definition: " << V - << " :: " << NoAliasAA << "\n"); - - if (!NoAliasAA.isAssumedNoAlias()) - return indicatePessimisticFixpoint(); - - LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V - << " is assumed NoAlias in the definition\n"); - - // (ii) Check whether the value is captured in the scope using AANoCapture. - // FIXME: This is conservative though, it is better to look at CFG and - // check only uses possibly executed before this callsite. - - auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP); - if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { - LLVM_DEBUG( - dbgs() << "[Attributor][AANoAliasCSArg] " << V - << " cannot be noalias as it is potentially captured\n"); - return indicatePessimisticFixpoint(); - } - - // (iii) Check there is no other pointer argument which could alias with the - // value. - // TODO: AbstractCallSite - ImmutableCallSite ICS(&getAnchorValue()); - for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) { - if (getArgNo() == (int)i) - continue; - const Value *ArgOp = ICS.getArgOperand(i); - if (!ArgOp->getType()->isPointerTy()) - continue; - - if (const Function *F = getAnchorScope()) { - if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) { - bool IsAliasing = !AAR->isNoAlias(&getAssociatedValue(), ArgOp); - LLVM_DEBUG(dbgs() - << "[Attributor][NoAliasCSArg] Check alias between " - "callsite arguments " - << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " " - << getAssociatedValue() << " " << *ArgOp << " => " - << (IsAliasing ? "" : "no-") << "alias \n"); - - if (!IsAliasing) - continue; - } - } - return indicatePessimisticFixpoint(); - } - - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) } -}; - -/// NoAlias attribute for function return value. -struct AANoAliasReturned final : AANoAliasImpl { - AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - virtual ChangeStatus updateImpl(Attributor &A) override { - - auto CheckReturnValue = [&](Value &RV) -> bool { - if (Constant *C = dyn_cast<Constant>(&RV)) - if (C->isNullValue() || isa<UndefValue>(C)) - return true; - - /// For now, we can only deduce noalias if we have call sites. - /// FIXME: add more support. - ImmutableCallSite ICS(&RV); - if (!ICS) - return false; - - const IRPosition &RVPos = IRPosition::value(RV); - const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos); - if (!NoAliasAA.isAssumedNoAlias()) - return false; - - const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos); - return NoCaptureAA.isAssumedNoCaptureMaybeReturned(); - }; - - if (!A.checkForAllReturnedValues(CheckReturnValue, *this)) - return indicatePessimisticFixpoint(); - - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) } -}; - -/// NoAlias attribute deduction for a call site return value. -struct AANoAliasCallSiteReturned final : AANoAliasImpl { - AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoAliasImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::returned(*F); - auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } -}; - -/// -------------------AAIsDead Function Attribute----------------------- - -struct AAIsDeadValueImpl : public AAIsDead { - AAIsDeadValueImpl(const IRPosition &IRP) : AAIsDead(IRP) {} - - /// See AAIsDead::isAssumedDead(). - bool isAssumedDead() const override { return getAssumed(); } - - /// See AAIsDead::isAssumedDead(BasicBlock *). - bool isAssumedDead(const BasicBlock *BB) const override { return false; } - - /// See AAIsDead::isKnownDead(BasicBlock *). - bool isKnownDead(const BasicBlock *BB) const override { return false; } - - /// See AAIsDead::isAssumedDead(Instruction *I). - bool isAssumedDead(const Instruction *I) const override { - return I == getCtxI() && isAssumedDead(); - } - - /// See AAIsDead::isKnownDead(Instruction *I). - bool isKnownDead(const Instruction *I) const override { - return I == getCtxI() && getKnown(); - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return isAssumedDead() ? "assumed-dead" : "assumed-live"; - } -}; - -struct AAIsDeadFloating : public AAIsDeadValueImpl { - AAIsDeadFloating(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue())) - if (!wouldInstructionBeTriviallyDead(I)) - indicatePessimisticFixpoint(); - if (isa<UndefValue>(getAssociatedValue())) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - auto UsePred = [&](const Use &U, bool &Follow) { - Instruction *UserI = cast<Instruction>(U.getUser()); - if (CallSite CS = CallSite(UserI)) { - if (!CS.isArgOperand(&U)) - return false; - const IRPosition &CSArgPos = - IRPosition::callsite_argument(CS, CS.getArgumentNo(&U)); - const auto &CSArgIsDead = A.getAAFor<AAIsDead>(*this, CSArgPos); - return CSArgIsDead.isAssumedDead(); - } - if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) { - const IRPosition &RetPos = IRPosition::returned(*RI->getFunction()); - const auto &RetIsDeadAA = A.getAAFor<AAIsDead>(*this, RetPos); - return RetIsDeadAA.isAssumedDead(); - } - Follow = true; - return wouldInstructionBeTriviallyDead(UserI); - }; - - if (!A.checkForAllUses(UsePred, *this, getAssociatedValue())) - return indicatePessimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - Value &V = getAssociatedValue(); - if (auto *I = dyn_cast<Instruction>(&V)) - if (wouldInstructionBeTriviallyDead(I)) { - A.deleteAfterManifest(*I); - return ChangeStatus::CHANGED; - } - - if (V.use_empty()) - return ChangeStatus::UNCHANGED; - - UndefValue &UV = *UndefValue::get(V.getType()); - bool AnyChange = A.changeValueAfterManifest(V, UV); - return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(IsDead) - } -}; - -struct AAIsDeadArgument : public AAIsDeadFloating { - AAIsDeadArgument(const IRPosition &IRP) : AAIsDeadFloating(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (!getAssociatedFunction()->hasExactDefinition()) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = AAIsDeadFloating::manifest(A); - Argument &Arg = *getAssociatedArgument(); - if (Arg.getParent()->hasLocalLinkage()) - if (A.registerFunctionSignatureRewrite( - Arg, /* ReplacementTypes */ {}, - Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{}, - Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) - return ChangeStatus::CHANGED; - return Changed; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(IsDead) } -}; - -struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { - AAIsDeadCallSiteArgument(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (isa<UndefValue>(getAssociatedValue())) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Argument *Arg = getAssociatedArgument(); - if (!Arg) - return indicatePessimisticFixpoint(); - const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), static_cast<const AAIsDead::StateType &>(ArgAA.getState())); - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - CallBase &CB = cast<CallBase>(getAnchorValue()); - Use &U = CB.getArgOperandUse(getArgNo()); - assert(!isa<UndefValue>(U.get()) && - "Expected undef values to be filtered out!"); - UndefValue &UV = *UndefValue::get(U->getType()); - if (A.changeUseAfterManifest(U, UV)) - return ChangeStatus::CHANGED; - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(IsDead) } -}; - -struct AAIsDeadReturned : public AAIsDeadValueImpl { - AAIsDeadReturned(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - - auto PredForCallSite = [&](AbstractCallSite ACS) { - if (ACS.isCallbackCall()) - return false; - const IRPosition &CSRetPos = - IRPosition::callsite_returned(ACS.getCallSite()); - const auto &RetIsDeadAA = A.getAAFor<AAIsDead>(*this, CSRetPos); - return RetIsDeadAA.isAssumedDead(); - }; - - if (!A.checkForAllCallSites(PredForCallSite, *this, true)) - return indicatePessimisticFixpoint(); - - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - // TODO: Rewrite the signature to return void? - bool AnyChange = false; - UndefValue &UV = *UndefValue::get(getAssociatedFunction()->getReturnType()); - auto RetInstPred = [&](Instruction &I) { - ReturnInst &RI = cast<ReturnInst>(I); - if (!isa<UndefValue>(RI.getReturnValue())) - AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV); - return true; - }; - A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret}); - return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(IsDead) } -}; - -struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { - AAIsDeadCallSiteReturned(const IRPosition &IRP) : AAIsDeadFloating(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(IsDead) } -}; - -struct AAIsDeadFunction : public AAIsDead { - AAIsDeadFunction(const IRPosition &IRP) : AAIsDead(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - const Function *F = getAssociatedFunction(); - if (F && !F->isDeclaration()) { - ToBeExploredFrom.insert(&F->getEntryBlock().front()); - assumeLive(A, F->getEntryBlock()); - } - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" + - std::to_string(getAssociatedFunction()->size()) + "][#TBEP " + - std::to_string(ToBeExploredFrom.size()) + "][#KDE " + - std::to_string(KnownDeadEnds.size()) + "]"; - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - assert(getState().isValidState() && - "Attempted to manifest an invalid state!"); - - ChangeStatus HasChanged = ChangeStatus::UNCHANGED; - Function &F = *getAssociatedFunction(); - - if (AssumedLiveBlocks.empty()) { - A.deleteAfterManifest(F); - return ChangeStatus::CHANGED; - } - - // Flag to determine if we can change an invoke to a call assuming the - // callee is nounwind. This is not possible if the personality of the - // function allows to catch asynchronous exceptions. - bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); - - KnownDeadEnds.set_union(ToBeExploredFrom); - for (const Instruction *DeadEndI : KnownDeadEnds) { - auto *CB = dyn_cast<CallBase>(DeadEndI); - if (!CB) - continue; - const auto &NoReturnAA = - A.getAAFor<AANoReturn>(*this, IRPosition::callsite_function(*CB)); - bool MayReturn = !NoReturnAA.isAssumedNoReturn(); - if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB))) - continue; - - if (auto *II = dyn_cast<InvokeInst>(DeadEndI)) - A.registerInvokeWithDeadSuccessor(const_cast<InvokeInst &>(*II)); - else - A.changeToUnreachableAfterManifest( - const_cast<Instruction *>(DeadEndI->getNextNode())); - HasChanged = ChangeStatus::CHANGED; - } - - for (BasicBlock &BB : F) - if (!AssumedLiveBlocks.count(&BB)) - A.deleteAfterManifest(BB); - - return HasChanged; - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} - - /// Returns true if the function is assumed dead. - bool isAssumedDead() const override { return false; } - - /// See AAIsDead::isAssumedDead(BasicBlock *). - bool isAssumedDead(const BasicBlock *BB) const override { - assert(BB->getParent() == getAssociatedFunction() && - "BB must be in the same anchor scope function."); - - if (!getAssumed()) - return false; - return !AssumedLiveBlocks.count(BB); - } - - /// See AAIsDead::isKnownDead(BasicBlock *). - bool isKnownDead(const BasicBlock *BB) const override { - return getKnown() && isAssumedDead(BB); - } - - /// See AAIsDead::isAssumed(Instruction *I). - bool isAssumedDead(const Instruction *I) const override { - assert(I->getParent()->getParent() == getAssociatedFunction() && - "Instruction must be in the same anchor scope function."); - - if (!getAssumed()) - return false; - - // If it is not in AssumedLiveBlocks then it for sure dead. - // Otherwise, it can still be after noreturn call in a live block. - if (!AssumedLiveBlocks.count(I->getParent())) - return true; - - // If it is not after a liveness barrier it is live. - const Instruction *PrevI = I->getPrevNode(); - while (PrevI) { - if (KnownDeadEnds.count(PrevI) || ToBeExploredFrom.count(PrevI)) - return true; - PrevI = PrevI->getPrevNode(); - } - return false; - } - - /// See AAIsDead::isKnownDead(Instruction *I). - bool isKnownDead(const Instruction *I) const override { - return getKnown() && isAssumedDead(I); - } - - /// Determine if \p F might catch asynchronous exceptions. - static bool mayCatchAsynchronousExceptions(const Function &F) { - return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); - } - - /// Assume \p BB is (partially) live now and indicate to the Attributor \p A - /// that internal function called from \p BB should now be looked at. - bool assumeLive(Attributor &A, const BasicBlock &BB) { - if (!AssumedLiveBlocks.insert(&BB).second) - return false; - - // We assume that all of BB is (probably) live now and if there are calls to - // internal functions we will assume that those are now live as well. This - // is a performance optimization for blocks with calls to a lot of internal - // functions. It can however cause dead functions to be treated as live. - for (const Instruction &I : BB) - if (ImmutableCallSite ICS = ImmutableCallSite(&I)) - if (const Function *F = ICS.getCalledFunction()) - if (F->hasLocalLinkage()) - A.markLiveInternalFunction(*F); +bool Attributor::isAssumedDead(const IRPosition &IRP, + const AbstractAttribute *QueryingAA, + const AAIsDead *FnLivenessAA, + bool CheckBBLivenessOnly, DepClassTy DepClass) { + Instruction *CtxI = IRP.getCtxI(); + if (CtxI && + isAssumedDead(*CtxI, QueryingAA, FnLivenessAA, + /* CheckBBLivenessOnly */ true, + CheckBBLivenessOnly ? DepClass : DepClassTy::OPTIONAL)) return true; - } - /// Collection of instructions that need to be explored again, e.g., we - /// did assume they do not transfer control to (one of their) successors. - SmallSetVector<const Instruction *, 8> ToBeExploredFrom; - - /// Collection of instructions that are known to not transfer control. - SmallSetVector<const Instruction *, 8> KnownDeadEnds; - - /// Collection of all assumed live BasicBlocks. - DenseSet<const BasicBlock *> AssumedLiveBlocks; -}; + if (CheckBBLivenessOnly) + return false; -static bool -identifyAliveSuccessors(Attributor &A, const CallBase &CB, - AbstractAttribute &AA, - SmallVectorImpl<const Instruction *> &AliveSuccessors) { - const IRPosition &IPos = IRPosition::callsite_function(CB); - - const auto &NoReturnAA = A.getAAFor<AANoReturn>(AA, IPos); - if (NoReturnAA.isAssumedNoReturn()) - return !NoReturnAA.isKnownNoReturn(); - if (CB.isTerminator()) - AliveSuccessors.push_back(&CB.getSuccessor(0)->front()); + // If we haven't succeeded we query the specific liveness info for the IRP. + const AAIsDead *IsDeadAA; + if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE) + IsDeadAA = &getOrCreateAAFor<AAIsDead>( + IRPosition::callsite_returned(cast<CallBase>(IRP.getAssociatedValue())), + QueryingAA, /* TrackDependence */ false); else - AliveSuccessors.push_back(CB.getNextNode()); - return false; -} - -static bool -identifyAliveSuccessors(Attributor &A, const InvokeInst &II, - AbstractAttribute &AA, - SmallVectorImpl<const Instruction *> &AliveSuccessors) { - bool UsedAssumedInformation = - identifyAliveSuccessors(A, cast<CallBase>(II), AA, AliveSuccessors); - - // First, determine if we can change an invoke to a call assuming the - // callee is nounwind. This is not possible if the personality of the - // function allows to catch asynchronous exceptions. - if (AAIsDeadFunction::mayCatchAsynchronousExceptions(*II.getFunction())) { - AliveSuccessors.push_back(&II.getUnwindDest()->front()); - } else { - const IRPosition &IPos = IRPosition::callsite_function(II); - const auto &AANoUnw = A.getAAFor<AANoUnwind>(AA, IPos); - if (AANoUnw.isAssumedNoUnwind()) { - UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind(); - } else { - AliveSuccessors.push_back(&II.getUnwindDest()->front()); - } - } - return UsedAssumedInformation; -} - -static Optional<ConstantInt *> -getAssumedConstant(Attributor &A, const Value &V, AbstractAttribute &AA, - bool &UsedAssumedInformation) { - const auto &ValueSimplifyAA = - A.getAAFor<AAValueSimplify>(AA, IRPosition::value(V)); - Optional<Value *> SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(A); - UsedAssumedInformation |= !ValueSimplifyAA.isKnown(); - if (!SimplifiedV.hasValue()) - return llvm::None; - if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) - return llvm::None; - return dyn_cast_or_null<ConstantInt>(SimplifiedV.getValue()); -} - -static bool -identifyAliveSuccessors(Attributor &A, const BranchInst &BI, - AbstractAttribute &AA, - SmallVectorImpl<const Instruction *> &AliveSuccessors) { - bool UsedAssumedInformation = false; - if (BI.getNumSuccessors() == 1) { - AliveSuccessors.push_back(&BI.getSuccessor(0)->front()); - } else { - Optional<ConstantInt *> CI = - getAssumedConstant(A, *BI.getCondition(), AA, UsedAssumedInformation); - if (!CI.hasValue()) { - // No value yet, assume both edges are dead. - } else if (CI.getValue()) { - const BasicBlock *SuccBB = - BI.getSuccessor(1 - CI.getValue()->getZExtValue()); - AliveSuccessors.push_back(&SuccBB->front()); - } else { - AliveSuccessors.push_back(&BI.getSuccessor(0)->front()); - AliveSuccessors.push_back(&BI.getSuccessor(1)->front()); - UsedAssumedInformation = false; - } - } - return UsedAssumedInformation; -} - -static bool -identifyAliveSuccessors(Attributor &A, const SwitchInst &SI, - AbstractAttribute &AA, - SmallVectorImpl<const Instruction *> &AliveSuccessors) { - bool UsedAssumedInformation = false; - Optional<ConstantInt *> CI = - getAssumedConstant(A, *SI.getCondition(), AA, UsedAssumedInformation); - if (!CI.hasValue()) { - // No value yet, assume all edges are dead. - } else if (CI.getValue()) { - for (auto &CaseIt : SI.cases()) { - if (CaseIt.getCaseValue() == CI.getValue()) { - AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front()); - return UsedAssumedInformation; - } - } - AliveSuccessors.push_back(&SI.getDefaultDest()->front()); - return UsedAssumedInformation; - } else { - for (const BasicBlock *SuccBB : successors(SI.getParent())) - AliveSuccessors.push_back(&SuccBB->front()); - } - return UsedAssumedInformation; -} - -ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) { - ChangeStatus Change = ChangeStatus::UNCHANGED; - - LLVM_DEBUG(dbgs() << "[AAIsDead] Live [" << AssumedLiveBlocks.size() << "/" - << getAssociatedFunction()->size() << "] BBs and " - << ToBeExploredFrom.size() << " exploration points and " - << KnownDeadEnds.size() << " known dead ends\n"); - - // Copy and clear the list of instructions we need to explore from. It is - // refilled with instructions the next update has to look at. - SmallVector<const Instruction *, 8> Worklist(ToBeExploredFrom.begin(), - ToBeExploredFrom.end()); - decltype(ToBeExploredFrom) NewToBeExploredFrom; - - SmallVector<const Instruction *, 8> AliveSuccessors; - while (!Worklist.empty()) { - const Instruction *I = Worklist.pop_back_val(); - LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n"); - - AliveSuccessors.clear(); - - bool UsedAssumedInformation = false; - switch (I->getOpcode()) { - // TODO: look for (assumed) UB to backwards propagate "deadness". - default: - if (I->isTerminator()) { - for (const BasicBlock *SuccBB : successors(I->getParent())) - AliveSuccessors.push_back(&SuccBB->front()); - } else { - AliveSuccessors.push_back(I->getNextNode()); - } - break; - case Instruction::Call: - UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I), - *this, AliveSuccessors); - break; - case Instruction::Invoke: - UsedAssumedInformation = identifyAliveSuccessors(A, cast<InvokeInst>(*I), - *this, AliveSuccessors); - break; - case Instruction::Br: - UsedAssumedInformation = identifyAliveSuccessors(A, cast<BranchInst>(*I), - *this, AliveSuccessors); - break; - case Instruction::Switch: - UsedAssumedInformation = identifyAliveSuccessors(A, cast<SwitchInst>(*I), - *this, AliveSuccessors); - break; - } - - if (UsedAssumedInformation) { - NewToBeExploredFrom.insert(I); - } else { - Change = ChangeStatus::CHANGED; - if (AliveSuccessors.empty() || - (I->isTerminator() && AliveSuccessors.size() < I->getNumSuccessors())) - KnownDeadEnds.insert(I); - } - - LLVM_DEBUG(dbgs() << "[AAIsDead] #AliveSuccessors: " - << AliveSuccessors.size() << " UsedAssumedInformation: " - << UsedAssumedInformation << "\n"); - - for (const Instruction *AliveSuccessor : AliveSuccessors) { - if (!I->isTerminator()) { - assert(AliveSuccessors.size() == 1 && - "Non-terminator expected to have a single successor!"); - Worklist.push_back(AliveSuccessor); - } else { - if (assumeLive(A, *AliveSuccessor->getParent())) - Worklist.push_back(AliveSuccessor); - } - } - } - - ToBeExploredFrom = std::move(NewToBeExploredFrom); - - // If we know everything is live there is no need to query for liveness. - // Instead, indicating a pessimistic fixpoint will cause the state to be - // "invalid" and all queries to be answered conservatively without lookups. - // To be in this state we have to (1) finished the exploration and (3) not - // discovered any non-trivial dead end and (2) not ruled unreachable code - // dead. - if (ToBeExploredFrom.empty() && - getAssociatedFunction()->size() == AssumedLiveBlocks.size() && - llvm::all_of(KnownDeadEnds, [](const Instruction *DeadEndI) { - return DeadEndI->isTerminator() && DeadEndI->getNumSuccessors() == 0; - })) - return indicatePessimisticFixpoint(); - return Change; -} - -/// Liveness information for a call sites. -struct AAIsDeadCallSite final : AAIsDeadFunction { - AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadFunction(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites instead of - // redirecting requests to the callee. - llvm_unreachable("Abstract attributes for liveness are not " - "supported for call sites yet!"); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - return indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} -}; - -/// -------------------- Dereferenceable Argument Attribute -------------------- - -template <> -ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S, - const DerefState &R) { - ChangeStatus CS0 = - clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState); - ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState); - return CS0 | CS1; -} - -struct AADereferenceableImpl : AADereferenceable { - AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {} - using StateType = DerefState; - - void initialize(Attributor &A) override { - SmallVector<Attribute, 4> Attrs; - getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, - Attrs); - for (const Attribute &Attr : Attrs) - takeKnownDerefBytesMaximum(Attr.getValueAsInt()); - - NonNullAA = &A.getAAFor<AANonNull>(*this, getIRPosition()); - - const IRPosition &IRP = this->getIRPosition(); - bool IsFnInterface = IRP.isFnInterfaceKind(); - const Function *FnScope = IRP.getAnchorScope(); - if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::getState() - /// { - StateType &getState() override { return *this; } - const StateType &getState() const override { return *this; } - /// } - - /// Helper function for collecting accessed bytes in must-be-executed-context - void addAccessedBytesForUse(Attributor &A, const Use *U, - const Instruction *I) { - const Value *UseV = U->get(); - if (!UseV->getType()->isPointerTy()) - return; - - Type *PtrTy = UseV->getType(); - const DataLayout &DL = A.getDataLayout(); - int64_t Offset; - if (const Value *Base = getBasePointerOfAccessPointerOperand( - I, Offset, DL, /*AllowNonInbounds*/ true)) { - if (Base == &getAssociatedValue() && - Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) { - uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType()); - addAccessedBytes(Offset, Size); - } - } - return; - } - - /// See AAFromMustBeExecutedContext - bool followUse(Attributor &A, const Use *U, const Instruction *I) { - bool IsNonNull = false; - bool TrackUse = false; - int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( - A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); - - addAccessedBytesForUse(A, U, I); - takeKnownDerefBytesMaximum(DerefBytes); - return TrackUse; - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - ChangeStatus Change = AADereferenceable::manifest(A); - if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) { - removeAttrs({Attribute::DereferenceableOrNull}); - return ChangeStatus::CHANGED; - } - return Change; - } - - void getDeducedAttributes(LLVMContext &Ctx, - SmallVectorImpl<Attribute> &Attrs) const override { - // TODO: Add *_globally support - if (isAssumedNonNull()) - Attrs.emplace_back(Attribute::getWithDereferenceableBytes( - Ctx, getAssumedDereferenceableBytes())); - else - Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes( - Ctx, getAssumedDereferenceableBytes())); - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - if (!getAssumedDereferenceableBytes()) - return "unknown-dereferenceable"; - return std::string("dereferenceable") + - (isAssumedNonNull() ? "" : "_or_null") + - (isAssumedGlobal() ? "_globally" : "") + "<" + - std::to_string(getKnownDereferenceableBytes()) + "-" + - std::to_string(getAssumedDereferenceableBytes()) + ">"; - } -}; - -/// Dereferenceable attribute for a floating value. -struct AADereferenceableFloating - : AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl> { - using Base = - AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl>; - AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Change = Base::updateImpl(A); - - const DataLayout &DL = A.getDataLayout(); - - auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool { - unsigned IdxWidth = - DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); - APInt Offset(IdxWidth, 0); - const Value *Base = - V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - - const auto &AA = - A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base)); - int64_t DerefBytes = 0; - if (!Stripped && this == &AA) { - // Use IR information if we did not strip anything. - // TODO: track globally. - bool CanBeNull; - DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull); - T.GlobalState.indicatePessimisticFixpoint(); - } else { - const DerefState &DS = static_cast<const DerefState &>(AA.getState()); - DerefBytes = DS.DerefBytesState.getAssumed(); - T.GlobalState &= DS.GlobalState; - } - - // TODO: Use `AAConstantRange` to infer dereferenceable bytes. - - // For now we do not try to "increase" dereferenceability due to negative - // indices as we first have to come up with code to deal with loops and - // for overflows of the dereferenceable bytes. - int64_t OffsetSExt = Offset.getSExtValue(); - if (OffsetSExt < 0) - OffsetSExt = 0; - - T.takeAssumedDerefBytesMinimum( - std::max(int64_t(0), DerefBytes - OffsetSExt)); - - if (this == &AA) { - if (!Stripped) { - // If nothing was stripped IR information is all we got. - T.takeKnownDerefBytesMaximum( - std::max(int64_t(0), DerefBytes - OffsetSExt)); - T.indicatePessimisticFixpoint(); - } else if (OffsetSExt > 0) { - // If something was stripped but there is circular reasoning we look - // for the offset. If it is positive we basically decrease the - // dereferenceable bytes in a circluar loop now, which will simply - // drive them down to the known value in a very slow way which we - // can accelerate. - T.indicatePessimisticFixpoint(); - } - } - - return T.isValidState(); - }; - - DerefState T; - if (!genericValueTraversal<AADereferenceable, DerefState>( - A, getIRPosition(), *this, T, VisitValueCB)) - return indicatePessimisticFixpoint(); - - return Change | clampStateAndIndicateChange(getState(), T); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(dereferenceable) - } -}; - -/// Dereferenceable attribute for a return value. -struct AADereferenceableReturned final - : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl, - DerefState> { - AADereferenceableReturned(const IRPosition &IRP) - : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl, - DerefState>(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FNRET_ATTR(dereferenceable) - } -}; - -/// Dereferenceable attribute for an argument -struct AADereferenceableArgument final - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl, DerefState> { - using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl, DerefState>; - AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_ARG_ATTR(dereferenceable) - } -}; - -/// Dereferenceable attribute for a call site argument. -struct AADereferenceableCallSiteArgument final : AADereferenceableFloating { - AADereferenceableCallSiteArgument(const IRPosition &IRP) - : AADereferenceableFloating(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CSARG_ATTR(dereferenceable) - } -}; - -/// Dereferenceable attribute deduction for a call site return value. -struct AADereferenceableCallSiteReturned final - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl> { - using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext< - AADereferenceable, AADereferenceableImpl>; - AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CS_ATTR(dereferenceable); - } -}; - -// ------------------------ Align Argument Attribute ------------------------ - -static unsigned int getKnownAlignForUse(Attributor &A, - AbstractAttribute &QueryingAA, - Value &AssociatedValue, const Use *U, - const Instruction *I, bool &TrackUse) { - // We need to follow common pointer manipulation uses to the accesses they - // feed into. - if (isa<CastInst>(I)) { - // Follow all but ptr2int casts. - TrackUse = !isa<PtrToIntInst>(I); - return 0; - } - if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { - if (GEP->hasAllConstantIndices()) { - TrackUse = true; - return 0; - } - } - - unsigned Alignment = 0; - if (ImmutableCallSite ICS = ImmutableCallSite(I)) { - if (ICS.isBundleOperand(U) || ICS.isCallee(U)) - return 0; - - unsigned ArgNo = ICS.getArgumentNo(U); - IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo); - // As long as we only use known information there is no need to track - // dependences here. - auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP, - /* TrackDependence */ false); - Alignment = AlignAA.getKnownAlign(); - } - - const Value *UseV = U->get(); - if (auto *SI = dyn_cast<StoreInst>(I)) - Alignment = SI->getAlignment(); - else if (auto *LI = dyn_cast<LoadInst>(I)) - Alignment = LI->getAlignment(); - - if (Alignment <= 1) - return 0; - - auto &DL = A.getDataLayout(); - int64_t Offset; - - if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) { - if (Base == &AssociatedValue) { - // BasePointerAddr + Offset = Alignment * Q for some integer Q. - // So we can say that the maximum power of two which is a divisor of - // gcd(Offset, Alignment) is an alignment. - - uint32_t gcd = - greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment); - Alignment = llvm::PowerOf2Floor(gcd); - } - } - - return Alignment; -} -struct AAAlignImpl : AAAlign { - AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - SmallVector<Attribute, 4> Attrs; - getAttrs({Attribute::Alignment}, Attrs); - for (const Attribute &Attr : Attrs) - takeKnownMaximum(Attr.getValueAsInt()); - - if (getIRPosition().isFnInterfaceKind() && - (!getAssociatedFunction() || - !getAssociatedFunction()->hasExactDefinition())) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - // Check for users that allow alignment annotations. - Value &AnchorVal = getIRPosition().getAnchorValue(); - for (const Use &U : AnchorVal.uses()) { - if (auto *SI = dyn_cast<StoreInst>(U.getUser())) { - if (SI->getPointerOperand() == &AnchorVal) - if (SI->getAlignment() < getAssumedAlign()) { - STATS_DECLTRACK(AAAlign, Store, - "Number of times alignment added to a store"); - SI->setAlignment(Align(getAssumedAlign())); - Changed = ChangeStatus::CHANGED; - } - } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) { - if (LI->getPointerOperand() == &AnchorVal) - if (LI->getAlignment() < getAssumedAlign()) { - LI->setAlignment(Align(getAssumedAlign())); - STATS_DECLTRACK(AAAlign, Load, - "Number of times alignment added to a load"); - Changed = ChangeStatus::CHANGED; - } - } - } - - return AAAlign::manifest(A) | Changed; - } - - // TODO: Provide a helper to determine the implied ABI alignment and check in - // the existing manifest method and a new one for AAAlignImpl that value - // to avoid making the alignment explicit if it did not improve. - - /// See AbstractAttribute::getDeducedAttributes - virtual void - getDeducedAttributes(LLVMContext &Ctx, - SmallVectorImpl<Attribute> &Attrs) const override { - if (getAssumedAlign() > 1) - Attrs.emplace_back( - Attribute::getWithAlignment(Ctx, Align(getAssumedAlign()))); - } - /// See AAFromMustBeExecutedContext - bool followUse(Attributor &A, const Use *U, const Instruction *I) { - bool TrackUse = false; - - unsigned int KnownAlign = - getKnownAlignForUse(A, *this, getAssociatedValue(), U, I, TrackUse); - takeKnownMaximum(KnownAlign); - - return TrackUse; - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + - "-" + std::to_string(getAssumedAlign()) + ">") - : "unknown-align"; - } -}; - -/// Align attribute for a floating value. -struct AAAlignFloating : AAFromMustBeExecutedContext<AAAlign, AAAlignImpl> { - using Base = AAFromMustBeExecutedContext<AAAlign, AAAlignImpl>; - AAAlignFloating(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - Base::updateImpl(A); - - const DataLayout &DL = A.getDataLayout(); - - auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, - bool Stripped) -> bool { - const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V)); - if (!Stripped && this == &AA) { - // Use only IR information if we did not strip anything. - const MaybeAlign PA = V.getPointerAlignment(DL); - T.takeKnownMaximum(PA ? PA->value() : 0); - T.indicatePessimisticFixpoint(); - } else { - // Use abstract attribute information. - const AAAlign::StateType &DS = - static_cast<const AAAlign::StateType &>(AA.getState()); - T ^= DS; - } - return T.isValidState(); - }; - - StateType T; - if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T, - VisitValueCB)) - return indicatePessimisticFixpoint(); - - // TODO: If we know we visited all incoming values, thus no are assumed - // dead, we can take the known information from the state T. - return clampStateAndIndicateChange(getState(), T); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) } -}; - -/// Align attribute for function return value. -struct AAAlignReturned final - : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> { - AAAlignReturned(const IRPosition &IRP) - : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } -}; - -/// Align attribute for function argument. -struct AAAlignArgument final - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign, - AAAlignImpl> { - AAAlignArgument(const IRPosition &IRP) - : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign, - AAAlignImpl>( - IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) } -}; - -struct AAAlignCallSiteArgument final : AAAlignFloating { - AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {} - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - return AAAlignImpl::manifest(A); - } - - /// See AbstractAttribute::updateImpl(Attributor &A). - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Changed = AAAlignFloating::updateImpl(A); - if (Argument *Arg = getAssociatedArgument()) { - const auto &ArgAlignAA = A.getAAFor<AAAlign>( - *this, IRPosition::argument(*Arg), /* TrackDependence */ false, - DepClassTy::OPTIONAL); - takeKnownMaximum(ArgAlignAA.getKnownAlign()); - } - return Changed; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) } -}; - -/// Align attribute deduction for a call site return value. -struct AAAlignCallSiteReturned final - : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign, - AAAlignImpl> { - using Base = - AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign, - AAAlignImpl>; - AAAlignCallSiteReturned(const IRPosition &IRP) : Base(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - Base::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } -}; - -/// ------------------ Function No-Return Attribute ---------------------------- -struct AANoReturnImpl : public AANoReturn { - AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AANoReturn::initialize(A); - Function *F = getAssociatedFunction(); - if (!F) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return getAssumed() ? "noreturn" : "may-return"; - } - - /// See AbstractAttribute::updateImpl(Attributor &A). - virtual ChangeStatus updateImpl(Attributor &A) override { - auto CheckForNoReturn = [](Instruction &) { return false; }; - if (!A.checkForAllInstructions(CheckForNoReturn, *this, - {(unsigned)Instruction::Ret})) - return indicatePessimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } -}; - -struct AANoReturnFunction final : AANoReturnImpl { - AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } -}; - -/// NoReturn attribute deduction for a call sites. -struct AANoReturnCallSite final : AANoReturnImpl { - AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AANoReturn::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } -}; - -/// ----------------------- Variable Capturing --------------------------------- - -/// A class to hold the state of for no-capture attributes. -struct AANoCaptureImpl : public AANoCapture { - AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) { - indicateOptimisticFixpoint(); - return; - } - Function *AnchorScope = getAnchorScope(); - if (isFnInterfaceKind() && - (!AnchorScope || !AnchorScope->hasExactDefinition())) { - indicatePessimisticFixpoint(); - return; - } - - // You cannot "capture" null in the default address space. - if (isa<ConstantPointerNull>(getAssociatedValue()) && - getAssociatedValue().getType()->getPointerAddressSpace() == 0) { - indicateOptimisticFixpoint(); - return; - } - - const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope; - - // Check what state the associated function can actually capture. - if (F) - determineFunctionCaptureCapabilities(getIRPosition(), *F, *this); - else - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; - - /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...). - virtual void - getDeducedAttributes(LLVMContext &Ctx, - SmallVectorImpl<Attribute> &Attrs) const override { - if (!isAssumedNoCaptureMaybeReturned()) - return; - - if (getArgNo() >= 0) { - if (isAssumedNoCapture()) - Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); - else if (ManifestInternal) - Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned")); - } - } - - /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known - /// depending on the ability of the function associated with \p IRP to capture - /// state in memory and through "returning/throwing", respectively. - static void determineFunctionCaptureCapabilities(const IRPosition &IRP, - const Function &F, - BitIntegerState &State) { - // TODO: Once we have memory behavior attributes we should use them here. - - // If we know we cannot communicate or write to memory, we do not care about - // ptr2int anymore. - if (F.onlyReadsMemory() && F.doesNotThrow() && - F.getReturnType()->isVoidTy()) { - State.addKnownBits(NO_CAPTURE); - return; - } - - // A function cannot capture state in memory if it only reads memory, it can - // however return/throw state and the state might be influenced by the - // pointer value, e.g., loading from a returned pointer might reveal a bit. - if (F.onlyReadsMemory()) - State.addKnownBits(NOT_CAPTURED_IN_MEM); - - // A function cannot communicate state back if it does not through - // exceptions and doesn not return values. - if (F.doesNotThrow() && F.getReturnType()->isVoidTy()) - State.addKnownBits(NOT_CAPTURED_IN_RET); - - // Check existing "returned" attributes. - int ArgNo = IRP.getArgNo(); - if (F.doesNotThrow() && ArgNo >= 0) { - for (unsigned u = 0, e = F.arg_size(); u < e; ++u) - if (F.hasParamAttribute(u, Attribute::Returned)) { - if (u == unsigned(ArgNo)) - State.removeAssumedBits(NOT_CAPTURED_IN_RET); - else if (F.onlyReadsMemory()) - State.addKnownBits(NO_CAPTURE); - else - State.addKnownBits(NOT_CAPTURED_IN_RET); - break; - } - } - } - - /// See AbstractState::getAsStr(). - const std::string getAsStr() const override { - if (isKnownNoCapture()) - return "known not-captured"; - if (isAssumedNoCapture()) - return "assumed not-captured"; - if (isKnownNoCaptureMaybeReturned()) - return "known not-captured-maybe-returned"; - if (isAssumedNoCaptureMaybeReturned()) - return "assumed not-captured-maybe-returned"; - return "assumed-captured"; - } -}; - -/// Attributor-aware capture tracker. -struct AACaptureUseTracker final : public CaptureTracker { - - /// Create a capture tracker that can lookup in-flight abstract attributes - /// through the Attributor \p A. - /// - /// If a use leads to a potential capture, \p CapturedInMemory is set and the - /// search is stopped. If a use leads to a return instruction, - /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. - /// If a use leads to a ptr2int which may capture the value, - /// \p CapturedInInteger is set. If a use is found that is currently assumed - /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies - /// set. All values in \p PotentialCopies are later tracked as well. For every - /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, - /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger - /// conservatively set to true. - AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, - const AAIsDead &IsDeadAA, AANoCapture::StateType &State, - SmallVectorImpl<const Value *> &PotentialCopies, - unsigned &RemainingUsesToExplore) - : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), - PotentialCopies(PotentialCopies), - RemainingUsesToExplore(RemainingUsesToExplore) {} - - /// Determine if \p V maybe captured. *Also updates the state!* - bool valueMayBeCaptured(const Value *V) { - if (V->getType()->isPointerTy()) { - PointerMayBeCaptured(V, this); - } else { - State.indicatePessimisticFixpoint(); - } - return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); - } - - /// See CaptureTracker::tooManyUses(). - void tooManyUses() override { - State.removeAssumedBits(AANoCapture::NO_CAPTURE); - } - - bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { - if (CaptureTracker::isDereferenceableOrNull(O, DL)) - return true; - const auto &DerefAA = - A.getAAFor<AADereferenceable>(NoCaptureAA, IRPosition::value(*O)); - return DerefAA.getAssumedDereferenceableBytes(); - } - - /// See CaptureTracker::captured(...). - bool captured(const Use *U) override { - Instruction *UInst = cast<Instruction>(U->getUser()); - LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst - << "\n"); - - // Because we may reuse the tracker multiple times we keep track of the - // number of explored uses ourselves as well. - if (RemainingUsesToExplore-- == 0) { - LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); - return isCapturedIn(/* Memory */ true, /* Integer */ true, - /* Return */ true); - } - - // Deal with ptr2int by following uses. - if (isa<PtrToIntInst>(UInst)) { - LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); - return valueMayBeCaptured(UInst); - } - - // Explicitly catch return instructions. - if (isa<ReturnInst>(UInst)) - return isCapturedIn(/* Memory */ false, /* Integer */ false, - /* Return */ true); - - // For now we only use special logic for call sites. However, the tracker - // itself knows about a lot of other non-capturing cases already. - CallSite CS(UInst); - if (!CS || !CS.isArgOperand(U)) - return isCapturedIn(/* Memory */ true, /* Integer */ true, - /* Return */ true); - - unsigned ArgNo = CS.getArgumentNo(U); - const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo); - // If we have a abstract no-capture attribute for the argument we can use - // it to justify a non-capture attribute here. This allows recursion! - auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos); - if (ArgNoCaptureAA.isAssumedNoCapture()) - return isCapturedIn(/* Memory */ false, /* Integer */ false, - /* Return */ false); - if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { - addPotentialCopy(CS); - return isCapturedIn(/* Memory */ false, /* Integer */ false, - /* Return */ false); - } - - // Lastly, we could not find a reason no-capture can be assumed so we don't. - return isCapturedIn(/* Memory */ true, /* Integer */ true, - /* Return */ true); - } - - /// Register \p CS as potential copy of the value we are checking. - void addPotentialCopy(CallSite CS) { - PotentialCopies.push_back(CS.getInstruction()); - } - - /// See CaptureTracker::shouldExplore(...). - bool shouldExplore(const Use *U) override { - // Check liveness. - return !IsDeadAA.isAssumedDead(cast<Instruction>(U->getUser())); - } - - /// Update the state according to \p CapturedInMem, \p CapturedInInt, and - /// \p CapturedInRet, then return the appropriate value for use in the - /// CaptureTracker::captured() interface. - bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, - bool CapturedInRet) { - LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " - << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); - if (CapturedInMem) - State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM); - if (CapturedInInt) - State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); - if (CapturedInRet) - State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); - return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); - } - -private: - /// The attributor providing in-flight abstract attributes. - Attributor &A; - - /// The abstract attribute currently updated. - AANoCapture &NoCaptureAA; - - /// The abstract liveness state. - const AAIsDead &IsDeadAA; - - /// The state currently updated. - AANoCapture::StateType &State; - - /// Set of potential copies of the tracked value. - SmallVectorImpl<const Value *> &PotentialCopies; - - /// Global counter to limit the number of explored uses. - unsigned &RemainingUsesToExplore; -}; - -ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { - const IRPosition &IRP = getIRPosition(); - const Value *V = - getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); - if (!V) - return indicatePessimisticFixpoint(); - - const Function *F = - getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); - assert(F && "Expected a function!"); - const IRPosition &FnPos = IRPosition::function(*F); - const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos); - - AANoCapture::StateType T; - - // Readonly means we cannot capture through memory. - const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); - if (FnMemAA.isAssumedReadOnly()) { - T.addKnownBits(NOT_CAPTURED_IN_MEM); - if (FnMemAA.isKnownReadOnly()) - addKnownBits(NOT_CAPTURED_IN_MEM); - } + IsDeadAA = &getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, + /* TrackDependence */ false); + // Don't check liveness for AAIsDead. + if (QueryingAA == IsDeadAA) + return false; - // Make sure all returned values are different than the underlying value. - // TODO: we could do this in a more sophisticated way inside - // AAReturnedValues, e.g., track all values that escape through returns - // directly somehow. - auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { - bool SeenConstant = false; - for (auto &It : RVAA.returned_values()) { - if (isa<Constant>(It.first)) { - if (SeenConstant) - return false; - SeenConstant = true; - } else if (!isa<Argument>(It.first) || - It.first == getAssociatedArgument()) - return false; - } + if (IsDeadAA->isAssumedDead()) { + if (QueryingAA) + recordDependence(*IsDeadAA, *QueryingAA, DepClass); return true; - }; - - const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(*this, FnPos); - if (NoUnwindAA.isAssumedNoUnwind()) { - bool IsVoidTy = F->getReturnType()->isVoidTy(); - const AAReturnedValues *RVAA = - IsVoidTy ? nullptr : &A.getAAFor<AAReturnedValues>(*this, FnPos); - if (IsVoidTy || CheckReturnedArgs(*RVAA)) { - T.addKnownBits(NOT_CAPTURED_IN_RET); - if (T.isKnown(NOT_CAPTURED_IN_MEM)) - return ChangeStatus::UNCHANGED; - if (NoUnwindAA.isKnownNoUnwind() && - (IsVoidTy || RVAA->getState().isAtFixpoint())) { - addKnownBits(NOT_CAPTURED_IN_RET); - if (isKnown(NOT_CAPTURED_IN_MEM)) - return indicateOptimisticFixpoint(); - } - } } - // Use the CaptureTracker interface and logic with the specialized tracker, - // defined in AACaptureUseTracker, that can look at in-flight abstract - // attributes and directly updates the assumed state. - SmallVector<const Value *, 4> PotentialCopies; - unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore; - AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, - RemainingUsesToExplore); - - // Check all potential copies of the associated value until we can assume - // none will be captured or we have to assume at least one might be. - unsigned Idx = 0; - PotentialCopies.push_back(V); - while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) - Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); - - AANoCapture::StateType &S = getState(); - auto Assumed = S.getAssumed(); - S.intersectAssumedBits(T.getAssumed()); - if (!isAssumedNoCaptureMaybeReturned()) - return indicatePessimisticFixpoint(); - return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; + return false; } -/// NoCapture attribute for function arguments. -struct AANoCaptureArgument final : AANoCaptureImpl { - AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) } -}; - -/// NoCapture attribute for call site arguments. -struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { - AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (Argument *Arg = getAssociatedArgument()) - if (Arg->hasByValAttr()) - indicateOptimisticFixpoint(); - AANoCaptureImpl::initialize(A); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Argument *Arg = getAssociatedArgument(); - if (!Arg) - return indicatePessimisticFixpoint(); - const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AANoCapture::StateType &>(ArgAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; -}; - -/// NoCapture attribute for floating values. -struct AANoCaptureFloating final : AANoCaptureImpl { - AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(nocapture) - } -}; - -/// NoCapture attribute for function return value. -struct AANoCaptureReturned final : AANoCaptureImpl { - AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) { - llvm_unreachable("NoCapture is not applicable to function returns!"); - } - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - llvm_unreachable("NoCapture is not applicable to function returns!"); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable("NoCapture is not applicable to function returns!"); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} -}; - -/// NoCapture attribute deduction for a call site return value. -struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { - AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CSRET_ATTR(nocapture) - } -}; - -/// ------------------ Value Simplify Attribute ---------------------------- -struct AAValueSimplifyImpl : AAValueSimplify { - AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {} - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple") - : "not-simple"; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} - - /// See AAValueSimplify::getAssumedSimplifiedValue() - Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override { - if (!getAssumed()) - return const_cast<Value *>(&getAssociatedValue()); - return SimplifiedAssociatedValue; - } - void initialize(Attributor &A) override {} - - /// Helper function for querying AAValueSimplify and updating candicate. - /// \param QueryingValue Value trying to unify with SimplifiedValue - /// \param AccumulatedSimplifiedValue Current simplification result. - static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, - Value &QueryingValue, - Optional<Value *> &AccumulatedSimplifiedValue) { - // FIXME: Add a typecast support. - - auto &ValueSimpifyAA = A.getAAFor<AAValueSimplify>( - QueryingAA, IRPosition::value(QueryingValue)); - - Optional<Value *> QueryingValueSimplified = - ValueSimpifyAA.getAssumedSimplifiedValue(A); - - if (!QueryingValueSimplified.hasValue()) - return true; - - if (!QueryingValueSimplified.getValue()) - return false; - - Value &QueryingValueSimplifiedUnwrapped = - *QueryingValueSimplified.getValue(); +bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred, + const AbstractAttribute &QueryingAA, + const Value &V, DepClassTy LivenessDepClass) { - if (isa<UndefValue>(QueryingValueSimplifiedUnwrapped)) - return true; - - if (AccumulatedSimplifiedValue.hasValue()) - return AccumulatedSimplifiedValue == QueryingValueSimplified; - - LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue - << " is assumed to be " - << QueryingValueSimplifiedUnwrapped << "\n"); - - AccumulatedSimplifiedValue = QueryingValueSimplified; - return true; - } - - bool askSimplifiedValueForAAValueConstantRange(Attributor &A) { - if (!getAssociatedValue().getType()->isIntegerTy()) - return false; - - const auto &ValueConstantRangeAA = - A.getAAFor<AAValueConstantRange>(*this, getIRPosition()); - - Optional<ConstantInt *> COpt = - ValueConstantRangeAA.getAssumedConstantInt(A); - if (COpt.hasValue()) { - if (auto *C = COpt.getValue()) - SimplifiedAssociatedValue = C; - else - return false; - } else { - // FIXME: It should be llvm::None but if you set llvm::None, - // values are mistakenly infered as `undef` now. - SimplifiedAssociatedValue = &getAssociatedValue(); - } + // Check the trivial case first as it catches void values. + if (V.use_empty()) return true; - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - if (!SimplifiedAssociatedValue.hasValue() || - !SimplifiedAssociatedValue.getValue()) - return Changed; - - if (auto *C = dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())) { - // We can replace the AssociatedValue with the constant. - Value &V = getAssociatedValue(); - if (!V.user_empty() && &V != C && V.getType() == C->getType()) { - LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C - << "\n"); - A.changeValueAfterManifest(V, *C); - Changed = ChangeStatus::CHANGED; - } - } - - return Changed | AAValueSimplify::manifest(A); - } - - /// See AbstractState::indicatePessimisticFixpoint(...). - ChangeStatus indicatePessimisticFixpoint() override { - // NOTE: Associated value will be returned in a pessimistic fixpoint and is - // regarded as known. That's why`indicateOptimisticFixpoint` is called. - SimplifiedAssociatedValue = &getAssociatedValue(); - indicateOptimisticFixpoint(); - return ChangeStatus::CHANGED; - } - -protected: - // An assumed simplified value. Initially, it is set to Optional::None, which - // means that the value is not clear under current assumption. If in the - // pessimistic state, getAssumedSimplifiedValue doesn't return this value but - // returns orignal associated value. - Optional<Value *> SimplifiedAssociatedValue; -}; - -struct AAValueSimplifyArgument final : AAValueSimplifyImpl { - AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} - - void initialize(Attributor &A) override { - AAValueSimplifyImpl::initialize(A); - if (!getAssociatedFunction() || getAssociatedFunction()->isDeclaration()) - indicatePessimisticFixpoint(); - if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest}, - /* IgnoreSubsumingPositions */ true)) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // Byval is only replacable if it is readonly otherwise we would write into - // the replaced value and not the copy that byval creates implicitly. - Argument *Arg = getAssociatedArgument(); - if (Arg->hasByValAttr()) { - const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition()); - if (!MemAA.isAssumedReadOnly()) - return indicatePessimisticFixpoint(); - } - - bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); - - auto PredForCallSite = [&](AbstractCallSite ACS) { - // Check if we have an associated argument or not (which can happen for - // callback calls). - Value *ArgOp = ACS.getCallArgOperand(getArgNo()); - if (!ArgOp) - return false; - // We can only propagate thread independent values through callbacks. - // This is different to direct/indirect call sites because for them we - // know the thread executing the caller and callee is the same. For - // callbacks this is not guaranteed, thus a thread dependent value could - // be different for the caller and callee, making it invalid to propagate. - if (ACS.isCallbackCall()) - if (auto *C = dyn_cast<Constant>(ArgOp)) - if (C->isThreadDependent()) - return false; - return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue); - }; - - if (!A.checkForAllCallSites(PredForCallSite, *this, true)) - if (!askSimplifiedValueForAAValueConstantRange(A)) - return indicatePessimisticFixpoint(); - - // If a candicate was found in this update, return CHANGED. - return HasValueBefore == SimplifiedAssociatedValue.hasValue() - ? ChangeStatus::UNCHANGED - : ChangeStatus ::CHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_ARG_ATTR(value_simplify) - } -}; - -struct AAValueSimplifyReturned : AAValueSimplifyImpl { - AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); - - auto PredForReturned = [&](Value &V) { - return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); - }; - - if (!A.checkForAllReturnedValues(PredForReturned, *this)) - if (!askSimplifiedValueForAAValueConstantRange(A)) - return indicatePessimisticFixpoint(); - - // If a candicate was found in this update, return CHANGED. - return HasValueBefore == SimplifiedAssociatedValue.hasValue() - ? ChangeStatus::UNCHANGED - : ChangeStatus ::CHANGED; - } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FNRET_ATTR(value_simplify) - } -}; - -struct AAValueSimplifyFloating : AAValueSimplifyImpl { - AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - Value &V = getAnchorValue(); - - // TODO: add other stuffs - if (isa<Constant>(V)) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); - - auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool { - auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V)); - if (!Stripped && this == &AA) { - // TODO: Look the instruction and check recursively. - - LLVM_DEBUG( - dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : " - << V << "\n"); - return false; - } - return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); - }; - - if (!genericValueTraversal<AAValueSimplify, BooleanState>( - A, getIRPosition(), *this, static_cast<BooleanState &>(*this), - VisitValueCB)) - if (!askSimplifiedValueForAAValueConstantRange(A)) - return indicatePessimisticFixpoint(); - - // If a candicate was found in this update, return CHANGED. - - return HasValueBefore == SimplifiedAssociatedValue.hasValue() - ? ChangeStatus::UNCHANGED - : ChangeStatus ::CHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(value_simplify) - } -}; - -struct AAValueSimplifyFunction : AAValueSimplifyImpl { - AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - SimplifiedAssociatedValue = &getAnchorValue(); - indicateOptimisticFixpoint(); - } - /// See AbstractAttribute::initialize(...). - ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable( - "AAValueSimplify(Function|CallSite)::updateImpl will not be called"); - } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FN_ATTR(value_simplify) - } -}; - -struct AAValueSimplifyCallSite : AAValueSimplifyFunction { - AAValueSimplifyCallSite(const IRPosition &IRP) - : AAValueSimplifyFunction(IRP) {} - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CS_ATTR(value_simplify) - } -}; - -struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned { - AAValueSimplifyCallSiteReturned(const IRPosition &IRP) - : AAValueSimplifyReturned(IRP) {} - - void trackStatistics() const override { - STATS_DECLTRACK_CSRET_ATTR(value_simplify) - } -}; -struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { - AAValueSimplifyCallSiteArgument(const IRPosition &IRP) - : AAValueSimplifyFloating(IRP) {} - - void trackStatistics() const override { - STATS_DECLTRACK_CSARG_ATTR(value_simplify) - } -}; - -/// ----------------------- Heap-To-Stack Conversion --------------------------- -struct AAHeapToStackImpl : public AAHeapToStack { - AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {} - - const std::string getAsStr() const override { - return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()); - } - - ChangeStatus manifest(Attributor &A) override { - assert(getState().isValidState() && - "Attempted to manifest an invalid state!"); - - ChangeStatus HasChanged = ChangeStatus::UNCHANGED; - Function *F = getAssociatedFunction(); - const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - - for (Instruction *MallocCall : MallocCalls) { - // This malloc cannot be replaced. - if (BadMallocCalls.count(MallocCall)) - continue; - - for (Instruction *FreeCall : FreesForMalloc[MallocCall]) { - LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); - A.deleteAfterManifest(*FreeCall); - HasChanged = ChangeStatus::CHANGED; - } - - LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall - << "\n"); - - Constant *Size; - if (isCallocLikeFn(MallocCall, TLI)) { - auto *Num = cast<ConstantInt>(MallocCall->getOperand(0)); - auto *SizeT = dyn_cast<ConstantInt>(MallocCall->getOperand(1)); - APInt TotalSize = SizeT->getValue() * Num->getValue(); - Size = - ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); - } else { - Size = cast<ConstantInt>(MallocCall->getOperand(0)); - } - - unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace(); - Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, - Size, "", MallocCall->getNextNode()); - - if (AI->getType() != MallocCall->getType()) - AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", - AI->getNextNode()); - - replaceAllInstructionUsesWith(*MallocCall, *AI); - - if (auto *II = dyn_cast<InvokeInst>(MallocCall)) { - auto *NBB = II->getNormalDest(); - BranchInst::Create(NBB, MallocCall->getParent()); - A.deleteAfterManifest(*MallocCall); - } else { - A.deleteAfterManifest(*MallocCall); - } - - if (isCallocLikeFn(MallocCall, TLI)) { - auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", - AI->getNextNode()); - Value *Ops[] = { - BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size, - ConstantInt::get(Type::getInt1Ty(F->getContext()), false)}; - - Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()}; - Module *M = F->getParent(); - Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); - CallInst::Create(Fn, Ops, "", BI->getNextNode()); - } - HasChanged = ChangeStatus::CHANGED; - } - - return HasChanged; - } - - /// Collection of all malloc calls in a function. - SmallSetVector<Instruction *, 4> MallocCalls; - - /// Collection of malloc calls that cannot be converted. - DenseSet<const Instruction *> BadMallocCalls; - - /// A map for each malloc call to the set of associated free calls. - DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc; - - ChangeStatus updateImpl(Attributor &A) override; -}; - -ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { - const Function *F = getAssociatedFunction(); - const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - - MustBeExecutedContextExplorer &Explorer = - A.getInfoCache().getMustBeExecutedContextExplorer(); - - auto FreeCheck = [&](Instruction &I) { - const auto &Frees = FreesForMalloc.lookup(&I); - if (Frees.size() != 1) - return false; - Instruction *UniqueFree = *Frees.begin(); - return Explorer.findInContextOf(UniqueFree, I.getNextNode()); - }; - - auto UsesCheck = [&](Instruction &I) { - bool ValidUsesOnly = true; - bool MustUse = true; - auto Pred = [&](const Use &U, bool &Follow) -> bool { - Instruction *UserI = cast<Instruction>(U.getUser()); - if (isa<LoadInst>(UserI)) - return true; - if (auto *SI = dyn_cast<StoreInst>(UserI)) { - if (SI->getValueOperand() == U.get()) { - LLVM_DEBUG(dbgs() - << "[H2S] escaping store to memory: " << *UserI << "\n"); - ValidUsesOnly = false; - } else { - // A store into the malloc'ed memory is fine. - } - return true; - } - if (auto *CB = dyn_cast<CallBase>(UserI)) { - if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd()) - return true; - // Record malloc. - if (isFreeCall(UserI, TLI)) { - if (MustUse) { - FreesForMalloc[&I].insert(UserI); - } else { - LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: " - << *UserI << "\n"); - ValidUsesOnly = false; - } - return true; - } - - unsigned ArgNo = CB->getArgOperandNo(&U); - - const auto &NoCaptureAA = A.getAAFor<AANoCapture>( - *this, IRPosition::callsite_argument(*CB, ArgNo)); - - // If a callsite argument use is nofree, we are fine. - const auto &ArgNoFreeAA = A.getAAFor<AANoFree>( - *this, IRPosition::callsite_argument(*CB, ArgNo)); - - if (!NoCaptureAA.isAssumedNoCapture() || - !ArgNoFreeAA.isAssumedNoFree()) { - LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); - ValidUsesOnly = false; - } - return true; - } - - if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) || - isa<PHINode>(UserI) || isa<SelectInst>(UserI)) { - MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI)); - Follow = true; - return true; - } - // Unknown user for which we can not track uses further (in a way that - // makes sense). - LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n"); - ValidUsesOnly = false; - return true; - }; - A.checkForAllUses(Pred, *this, I); - return ValidUsesOnly; - }; - - auto MallocCallocCheck = [&](Instruction &I) { - if (BadMallocCalls.count(&I)) - return true; - - bool IsMalloc = isMallocLikeFn(&I, TLI); - bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); - if (!IsMalloc && !IsCalloc) { - BadMallocCalls.insert(&I); - return true; - } - - if (IsMalloc) { - if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0))) - if (Size->getValue().ule(MaxHeapToStackSize)) - if (UsesCheck(I) || FreeCheck(I)) { - MallocCalls.insert(&I); - return true; - } - } else if (IsCalloc) { - bool Overflow = false; - if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0))) - if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1))) - if ((Size->getValue().umul_ov(Num->getValue(), Overflow)) - .ule(MaxHeapToStackSize)) - if (!Overflow && (UsesCheck(I) || FreeCheck(I))) { - MallocCalls.insert(&I); - return true; - } - } - - BadMallocCalls.insert(&I); - return true; - }; - - size_t NumBadMallocs = BadMallocCalls.size(); - - A.checkForAllCallLikeInstructions(MallocCallocCheck, *this); - - if (NumBadMallocs != BadMallocCalls.size()) - return ChangeStatus::CHANGED; - - return ChangeStatus::UNCHANGED; -} - -struct AAHeapToStackFunction final : public AAHeapToStackImpl { - AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECL(MallocCalls, Function, - "Number of malloc calls converted to allocas"); - for (auto *C : MallocCalls) - if (!BadMallocCalls.count(C)) - ++BUILD_STAT_NAME(MallocCalls, Function); - } -}; - -/// -------------------- Memory Behavior Attributes ---------------------------- -/// Includes read-none, read-only, and write-only. -/// ---------------------------------------------------------------------------- -struct AAMemoryBehaviorImpl : public AAMemoryBehavior { - AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - intersectAssumedBits(BEST_STATE); - getKnownStateFromValue(getIRPosition(), getState()); - IRAttribute::initialize(A); - } - - /// Return the memory behavior information encoded in the IR for \p IRP. - static void getKnownStateFromValue(const IRPosition &IRP, - BitIntegerState &State, - bool IgnoreSubsumingPositions = false) { - SmallVector<Attribute, 2> Attrs; - IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions); - for (const Attribute &Attr : Attrs) { - switch (Attr.getKindAsEnum()) { - case Attribute::ReadNone: - State.addKnownBits(NO_ACCESSES); - break; - case Attribute::ReadOnly: - State.addKnownBits(NO_WRITES); - break; - case Attribute::WriteOnly: - State.addKnownBits(NO_READS); - break; - default: - llvm_unreachable("Unexpcted attribute!"); - } - } - - if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) { - if (!I->mayReadFromMemory()) - State.addKnownBits(NO_READS); - if (!I->mayWriteToMemory()) - State.addKnownBits(NO_WRITES); - } - } - - /// See AbstractAttribute::getDeducedAttributes(...). - void getDeducedAttributes(LLVMContext &Ctx, - SmallVectorImpl<Attribute> &Attrs) const override { - assert(Attrs.size() == 0); - if (isAssumedReadNone()) - Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); - else if (isAssumedReadOnly()) - Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly)); - else if (isAssumedWriteOnly()) - Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly)); - assert(Attrs.size() <= 1); - } - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - const IRPosition &IRP = getIRPosition(); - - // Check if we would improve the existing attributes first. - SmallVector<Attribute, 4> DeducedAttrs; - getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); - if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { - return IRP.hasAttr(Attr.getKindAsEnum(), - /* IgnoreSubsumingPositions */ true); - })) - return ChangeStatus::UNCHANGED; - - // Clear existing attributes. - IRP.removeAttrs(AttrKinds); - - // Use the generic manifest method. - return IRAttribute::manifest(A); - } - - /// See AbstractState::getAsStr(). - const std::string getAsStr() const override { - if (isAssumedReadNone()) - return "readnone"; - if (isAssumedReadOnly()) - return "readonly"; - if (isAssumedWriteOnly()) - return "writeonly"; - return "may-read/write"; - } - - /// The set of IR attributes AAMemoryBehavior deals with. - static const Attribute::AttrKind AttrKinds[3]; -}; - -const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = { - Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly}; - -/// Memory behavior attribute for a floating value. -struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { - AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAMemoryBehaviorImpl::initialize(A); - // Initialize the use vector with all direct uses of the associated value. - for (const Use &U : getAssociatedValue().uses()) - Uses.insert(&U); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - if (isAssumedReadNone()) - STATS_DECLTRACK_FLOATING_ATTR(readnone) - else if (isAssumedReadOnly()) - STATS_DECLTRACK_FLOATING_ATTR(readonly) - else if (isAssumedWriteOnly()) - STATS_DECLTRACK_FLOATING_ATTR(writeonly) - } - -private: - /// Return true if users of \p UserI might access the underlying - /// variable/location described by \p U and should therefore be analyzed. - bool followUsersOfUseIn(Attributor &A, const Use *U, - const Instruction *UserI); - - /// Update the state according to the effect of use \p U in \p UserI. - void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); - -protected: - /// Container for (transitive) uses of the associated argument. - SetVector<const Use *> Uses; -}; - -/// Memory behavior attribute for function argument. -struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { - AAMemoryBehaviorArgument(const IRPosition &IRP) - : AAMemoryBehaviorFloating(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - intersectAssumedBits(BEST_STATE); - const IRPosition &IRP = getIRPosition(); - // TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we - // can query it when we use has/getAttr. That would allow us to reuse the - // initialize of the base class here. - bool HasByVal = - IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true); - getKnownStateFromValue(IRP, getState(), - /* IgnoreSubsumingPositions */ HasByVal); - - // Initialize the use vector with all direct uses of the associated value. - Argument *Arg = getAssociatedArgument(); - if (!Arg || !Arg->getParent()->hasExactDefinition()) { - indicatePessimisticFixpoint(); - } else { - // Initialize the use vector with all direct uses of the associated value. - for (const Use &U : Arg->uses()) - Uses.insert(&U); - } - } - - ChangeStatus manifest(Attributor &A) override { - // TODO: From readattrs.ll: "inalloca parameters are always - // considered written" - if (hasAttr({Attribute::InAlloca})) { - removeKnownBits(NO_WRITES); - removeAssumedBits(NO_WRITES); - } - return AAMemoryBehaviorFloating::manifest(A); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - if (isAssumedReadNone()) - STATS_DECLTRACK_ARG_ATTR(readnone) - else if (isAssumedReadOnly()) - STATS_DECLTRACK_ARG_ATTR(readonly) - else if (isAssumedWriteOnly()) - STATS_DECLTRACK_ARG_ATTR(writeonly) - } -}; - -struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { - AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP) - : AAMemoryBehaviorArgument(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - if (Argument *Arg = getAssociatedArgument()) { - if (Arg->hasByValAttr()) { - addKnownBits(NO_WRITES); - removeKnownBits(NO_READS); - removeAssumedBits(NO_READS); - } - } else { - } - AAMemoryBehaviorArgument::initialize(A); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Argument *Arg = getAssociatedArgument(); - const IRPosition &ArgPos = IRPosition::argument(*Arg); - auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AAMemoryBehavior::StateType &>(ArgAA.getState())); - } - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - if (isAssumedReadNone()) - STATS_DECLTRACK_CSARG_ATTR(readnone) - else if (isAssumedReadOnly()) - STATS_DECLTRACK_CSARG_ATTR(readonly) - else if (isAssumedWriteOnly()) - STATS_DECLTRACK_CSARG_ATTR(writeonly) - } -}; - -/// Memory behavior attribute for a call site return position. -struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { - AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP) - : AAMemoryBehaviorFloating(IRP) {} - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - // We do not annotate returned values. - return ChangeStatus::UNCHANGED; - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} -}; - -/// An AA to represent the memory behavior function attributes. -struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { - AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(Attributor &A). - virtual ChangeStatus updateImpl(Attributor &A) override; - - /// See AbstractAttribute::manifest(...). - ChangeStatus manifest(Attributor &A) override { - Function &F = cast<Function>(getAnchorValue()); - if (isAssumedReadNone()) { - F.removeFnAttr(Attribute::ArgMemOnly); - F.removeFnAttr(Attribute::InaccessibleMemOnly); - F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } - return AAMemoryBehaviorImpl::manifest(A); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - if (isAssumedReadNone()) - STATS_DECLTRACK_FN_ATTR(readnone) - else if (isAssumedReadOnly()) - STATS_DECLTRACK_FN_ATTR(readonly) - else if (isAssumedWriteOnly()) - STATS_DECLTRACK_FN_ATTR(writeonly) - } -}; - -/// AAMemoryBehavior attribute for call sites. -struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { - AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAMemoryBehaviorImpl::initialize(A); - Function *F = getAssociatedFunction(); - if (!F || !F->hasExactDefinition()) - indicatePessimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Once we have call site specific value information we can provide - // call site specific liveness liveness information and then it makes - // sense to specialize attributes for call sites arguments instead of - // redirecting requests to the callee argument. - Function *F = getAssociatedFunction(); - const IRPosition &FnPos = IRPosition::function(*F); - auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast<const AAMemoryBehavior::StateType &>(FnAA.getState())); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - if (isAssumedReadNone()) - STATS_DECLTRACK_CS_ATTR(readnone) - else if (isAssumedReadOnly()) - STATS_DECLTRACK_CS_ATTR(readonly) - else if (isAssumedWriteOnly()) - STATS_DECLTRACK_CS_ATTR(writeonly) - } -}; -} // namespace - -ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { - - // The current assumed state used to determine a change. - auto AssumedState = getAssumed(); - - auto CheckRWInst = [&](Instruction &I) { - // If the instruction has an own memory behavior state, use it to restrict - // the local state. No further analysis is required as the other memory - // state is as optimistic as it gets. - if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { - const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( - *this, IRPosition::callsite_function(ICS)); - intersectAssumedBits(MemBehaviorAA.getAssumed()); - return !isAtFixpoint(); - } - - // Remove access kind modifiers if necessary. - if (I.mayReadFromMemory()) - removeAssumedBits(NO_READS); - if (I.mayWriteToMemory()) - removeAssumedBits(NO_WRITES); - return !isAtFixpoint(); - }; - - if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) - return indicatePessimisticFixpoint(); - - return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED - : ChangeStatus::UNCHANGED; -} - -ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { - - const IRPosition &IRP = getIRPosition(); - const IRPosition &FnPos = IRPosition::function_scope(IRP); - AAMemoryBehavior::StateType &S = getState(); - - // First, check the function scope. We take the known information and we avoid - // work if the assumed information implies the current assumed information for - // this attribute. This is a valid for all but byval arguments. - Argument *Arg = IRP.getAssociatedArgument(); - AAMemoryBehavior::base_t FnMemAssumedState = - AAMemoryBehavior::StateType::getWorstState(); - if (!Arg || !Arg->hasByValAttr()) { - const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); - FnMemAssumedState = FnMemAA.getAssumed(); - S.addKnownBits(FnMemAA.getKnown()); - if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed()) - return ChangeStatus::UNCHANGED; - } - - // Make sure the value is not captured (except through "return"), if - // it is, any information derived would be irrelevant anyway as we cannot - // check the potential aliases introduced by the capture. However, no need - // to fall back to anythign less optimistic than the function state. - const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>( - *this, IRP, /* TrackDependence */ true, DepClassTy::OPTIONAL); - if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { - S.intersectAssumedBits(FnMemAssumedState); - return ChangeStatus::CHANGED; - } - - // The current assumed state used to determine a change. - auto AssumedState = S.getAssumed(); - - // Liveness information to exclude dead users. - // TODO: Take the FnPos once we have call site specific liveness information. - const auto &LivenessAA = A.getAAFor<AAIsDead>( - *this, IRPosition::function(*IRP.getAssociatedFunction())); - - // Visit and expand uses until all are analyzed or a fixpoint is reached. - for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) { - const Use *U = Uses[i]; - Instruction *UserI = cast<Instruction>(U->getUser()); - LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI - << " [Dead: " << (LivenessAA.isAssumedDead(UserI)) - << "]\n"); - if (LivenessAA.isAssumedDead(UserI)) - continue; - - // Check if the users of UserI should also be visited. - if (followUsersOfUseIn(A, U, UserI)) - for (const Use &UserIUse : UserI->uses()) - Uses.insert(&UserIUse); - - // If UserI might touch memory we analyze the use in detail. - if (UserI->mayReadOrWriteMemory()) - analyzeUseIn(A, U, UserI); - } - - return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED - : ChangeStatus::UNCHANGED; -} - -bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, - const Instruction *UserI) { - // The loaded value is unrelated to the pointer argument, no need to - // follow the users of the load. - if (isa<LoadInst>(UserI)) - return false; - - // By default we follow all uses assuming UserI might leak information on U, - // we have special handling for call sites operands though. - ImmutableCallSite ICS(UserI); - if (!ICS || !ICS.isArgOperand(U)) + // If the value is replaced by another one, for now a constant, we do not have + // uses. Note that this requires users of `checkForAllUses` to not recurse but + // instead use the `follow` callback argument to look at transitive users, + // however, that should be clear from the presence of the argument. + bool UsedAssumedInformation = false; + Optional<Constant *> C = + getAssumedConstant(V, QueryingAA, UsedAssumedInformation); + if (C.hasValue() && C.getValue()) { + LLVM_DEBUG(dbgs() << "[Attributor] Value is simplified, uses skipped: " << V + << " -> " << *C.getValue() << "\n"); return true; - - // If the use is a call argument known not to be captured, the users of - // the call do not need to be visited because they have to be unrelated to - // the input. Note that this check is not trivial even though we disallow - // general capturing of the underlying argument. The reason is that the - // call might the argument "through return", which we allow and for which we - // need to check call users. - unsigned ArgNo = ICS.getArgumentNo(U); - const auto &ArgNoCaptureAA = - A.getAAFor<AANoCapture>(*this, IRPosition::callsite_argument(ICS, ArgNo)); - return !ArgNoCaptureAA.isAssumedNoCapture(); -} - -void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, - const Instruction *UserI) { - assert(UserI->mayReadOrWriteMemory()); - - switch (UserI->getOpcode()) { - default: - // TODO: Handle all atomics and other side-effect operations we know of. - break; - case Instruction::Load: - // Loads cause the NO_READS property to disappear. - removeAssumedBits(NO_READS); - return; - - case Instruction::Store: - // Stores cause the NO_WRITES property to disappear if the use is the - // pointer operand. Note that we do assume that capturing was taken care of - // somewhere else. - if (cast<StoreInst>(UserI)->getPointerOperand() == U->get()) - removeAssumedBits(NO_WRITES); - return; - - case Instruction::Call: - case Instruction::CallBr: - case Instruction::Invoke: { - // For call sites we look at the argument memory behavior attribute (this - // could be recursive!) in order to restrict our own state. - ImmutableCallSite ICS(UserI); - - // Give up on operand bundles. - if (ICS.isBundleOperand(U)) { - indicatePessimisticFixpoint(); - return; - } - - // Calling a function does read the function pointer, maybe write it if the - // function is self-modifying. - if (ICS.isCallee(U)) { - removeAssumedBits(NO_READS); - break; - } - - // Adjust the possible access behavior based on the information on the - // argument. - unsigned ArgNo = ICS.getArgumentNo(U); - const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo); - const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos); - // "assumed" has at most the same bits as the MemBehaviorAA assumed - // and at least "known". - intersectAssumedBits(MemBehaviorAA.getAssumed()); - return; - } - }; - - // Generally, look at the "may-properties" and adjust the assumed state if we - // did not trigger special handling before. - if (UserI->mayReadFromMemory()) - removeAssumedBits(NO_READS); - if (UserI->mayWriteToMemory()) - removeAssumedBits(NO_WRITES); -} -/// ------------------ Value Constant Range Attribute ------------------------- - -struct AAValueConstantRangeImpl : AAValueConstantRange { - using StateType = IntegerRangeState; - AAValueConstantRangeImpl(const IRPosition &IRP) : AAValueConstantRange(IRP) {} - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - std::string Str; - llvm::raw_string_ostream OS(Str); - OS << "range(" << getBitWidth() << ")<"; - getKnown().print(OS); - OS << " / "; - getAssumed().print(OS); - OS << ">"; - return OS.str(); - } - - /// Helper function to get a SCEV expr for the associated value at program - /// point \p I. - const SCEV *getSCEV(Attributor &A, const Instruction *I = nullptr) const { - if (!getAnchorScope()) - return nullptr; - - ScalarEvolution *SE = - A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>( - *getAnchorScope()); - - LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>( - *getAnchorScope()); - - if (!SE || !LI) - return nullptr; - - const SCEV *S = SE->getSCEV(&getAssociatedValue()); - if (!I) - return S; - - return SE->getSCEVAtScope(S, LI->getLoopFor(I->getParent())); - } - - /// Helper function to get a range from SCEV for the associated value at - /// program point \p I. - ConstantRange getConstantRangeFromSCEV(Attributor &A, - const Instruction *I = nullptr) const { - if (!getAnchorScope()) - return getWorstState(getBitWidth()); - - ScalarEvolution *SE = - A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>( - *getAnchorScope()); - - const SCEV *S = getSCEV(A, I); - if (!SE || !S) - return getWorstState(getBitWidth()); - - return SE->getUnsignedRange(S); - } - - /// Helper function to get a range from LVI for the associated value at - /// program point \p I. - ConstantRange - getConstantRangeFromLVI(Attributor &A, - const Instruction *CtxI = nullptr) const { - if (!getAnchorScope()) - return getWorstState(getBitWidth()); - - LazyValueInfo *LVI = - A.getInfoCache().getAnalysisResultForFunction<LazyValueAnalysis>( - *getAnchorScope()); - - if (!LVI || !CtxI) - return getWorstState(getBitWidth()); - return LVI->getConstantRange(&getAssociatedValue(), - const_cast<BasicBlock *>(CtxI->getParent()), - const_cast<Instruction *>(CtxI)); - } - - /// See AAValueConstantRange::getKnownConstantRange(..). - ConstantRange - getKnownConstantRange(Attributor &A, - const Instruction *CtxI = nullptr) const override { - if (!CtxI || CtxI == getCtxI()) - return getKnown(); - - ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI); - ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI); - return getKnown().intersectWith(SCEVR).intersectWith(LVIR); } - /// See AAValueConstantRange::getAssumedConstantRange(..). - ConstantRange - getAssumedConstantRange(Attributor &A, - const Instruction *CtxI = nullptr) const override { - // TODO: Make SCEV use Attributor assumption. - // We may be able to bound a variable range via assumptions in - // Attributor. ex.) If x is assumed to be in [1, 3] and y is known to - // evolve to x^2 + x, then we can say that y is in [2, 12]. - - if (!CtxI || CtxI == getCtxI()) - return getAssumed(); - - ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI); - ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI); - return getAssumed().intersectWith(SCEVR).intersectWith(LVIR); - } - - /// See AbstractAttribute::initialize(..). - void initialize(Attributor &A) override { - // Intersect a range given by SCEV. - intersectKnown(getConstantRangeFromSCEV(A, getCtxI())); - - // Intersect a range given by LVI. - intersectKnown(getConstantRangeFromLVI(A, getCtxI())); - } - - /// Helper function to create MDNode for range metadata. - static MDNode * - getMDNodeForConstantRange(Type *Ty, LLVMContext &Ctx, - const ConstantRange &AssumedConstantRange) { - Metadata *LowAndHigh[] = {ConstantAsMetadata::get(ConstantInt::get( - Ty, AssumedConstantRange.getLower())), - ConstantAsMetadata::get(ConstantInt::get( - Ty, AssumedConstantRange.getUpper()))}; - return MDNode::get(Ctx, LowAndHigh); - } - - /// Return true if \p Assumed is included in \p KnownRanges. - static bool isBetterRange(const ConstantRange &Assumed, MDNode *KnownRanges) { - - if (Assumed.isFullSet()) - return false; - - if (!KnownRanges) - return true; - - // If multiple ranges are annotated in IR, we give up to annotate assumed - // range for now. - - // TODO: If there exists a known range which containts assumed range, we - // can say assumed range is better. - if (KnownRanges->getNumOperands() > 2) - return false; - - ConstantInt *Lower = - mdconst::extract<ConstantInt>(KnownRanges->getOperand(0)); - ConstantInt *Upper = - mdconst::extract<ConstantInt>(KnownRanges->getOperand(1)); - - ConstantRange Known(Lower->getValue(), Upper->getValue()); - return Known.contains(Assumed) && Known != Assumed; - } - - /// Helper function to set range metadata. - static bool - setRangeMetadataIfisBetterRange(Instruction *I, - const ConstantRange &AssumedConstantRange) { - auto *OldRangeMD = I->getMetadata(LLVMContext::MD_range); - if (isBetterRange(AssumedConstantRange, OldRangeMD)) { - if (!AssumedConstantRange.isEmptySet()) { - I->setMetadata(LLVMContext::MD_range, - getMDNodeForConstantRange(I->getType(), I->getContext(), - AssumedConstantRange)); - return true; - } - } - return false; - } - - /// See AbstractAttribute::manifest() - ChangeStatus manifest(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - ConstantRange AssumedConstantRange = getAssumedConstantRange(A); - assert(!AssumedConstantRange.isFullSet() && "Invalid state"); - - auto &V = getAssociatedValue(); - if (!AssumedConstantRange.isEmptySet() && - !AssumedConstantRange.isSingleElement()) { - if (Instruction *I = dyn_cast<Instruction>(&V)) - if (isa<CallInst>(I) || isa<LoadInst>(I)) - if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange)) - Changed = ChangeStatus::CHANGED; - } - - return Changed; - } -}; - -struct AAValueConstantRangeArgument final : public AAValueConstantRangeImpl { - - AAValueConstantRangeArgument(const IRPosition &IRP) - : AAValueConstantRangeImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Use AAArgumentFromCallSiteArguments - - IntegerRangeState S(getBitWidth()); - clampCallSiteArgumentStates<AAValueConstantRange, IntegerRangeState>( - A, *this, S); - - // TODO: If we know we visited all incoming values, thus no are assumed - // dead, we can take the known information from the state T. - return clampStateAndIndicateChange<IntegerRangeState>(this->getState(), S); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_ARG_ATTR(value_range) - } -}; - -struct AAValueConstantRangeReturned : AAValueConstantRangeImpl { - AAValueConstantRangeReturned(const IRPosition &IRP) - : AAValueConstantRangeImpl(IRP) {} - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - // TODO: Use AAReturnedFromReturnedValues - - // TODO: If we know we visited all returned values, thus no are assumed - // dead, we can take the known information from the state T. - - IntegerRangeState S(getBitWidth()); - - clampReturnedValueStates<AAValueConstantRange, IntegerRangeState>(A, *this, - S); - return clampStateAndIndicateChange<StateType>(this->getState(), S); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FNRET_ATTR(value_range) - } -}; - -struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { - AAValueConstantRangeFloating(const IRPosition &IRP) - : AAValueConstantRangeImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAValueConstantRange::initialize(A); - Value &V = getAssociatedValue(); - - if (auto *C = dyn_cast<ConstantInt>(&V)) { - unionAssumed(ConstantRange(C->getValue())); - indicateOptimisticFixpoint(); - return; - } - - if (isa<UndefValue>(&V)) { - indicateOptimisticFixpoint(); - return; - } - - if (auto *I = dyn_cast<Instruction>(&V)) - if (isa<BinaryOperator>(I) || isa<CmpInst>(I)) { - Value *LHS = I->getOperand(0); - Value *RHS = I->getOperand(1); - - if (LHS->getType()->isIntegerTy() && RHS->getType()->isIntegerTy()) - return; - } - - // If it is a load instruction with range metadata, use it. - if (LoadInst *LI = dyn_cast<LoadInst>(&V)) - if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) { - intersectKnown(getConstantRangeFromMetadata(*RangeMD)); - return; - } - - // Otherwise we give up. - indicatePessimisticFixpoint(); - - LLVM_DEBUG(dbgs() << "[Attributor][AAValueConstantRange] We give up: " - << getAssociatedValue()); - } - - bool calculateBinaryOperator(Attributor &A, BinaryOperator *BinOp, - IntegerRangeState &T, Instruction *CtxI) { - Value *LHS = BinOp->getOperand(0); - Value *RHS = BinOp->getOperand(1); - - auto &LHSAA = - A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS)); - auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI); - - auto &RHSAA = - A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS)); - auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI); - - auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange); - - T.unionAssumed(AssumedRange); - - // TODO: Track a known state too. - - return T.isValidState(); - } - - bool calculateCmpInst(Attributor &A, CmpInst *CmpI, IntegerRangeState &T, - Instruction *CtxI) { - Value *LHS = CmpI->getOperand(0); - Value *RHS = CmpI->getOperand(1); - - auto &LHSAA = - A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS)); - auto &RHSAA = - A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS)); - - auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI); - auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI); - - // If one of them is empty set, we can't decide. - if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet()) - return true; - - bool MustTrue = false, MustFalse = false; - - auto AllowedRegion = - ConstantRange::makeAllowedICmpRegion(CmpI->getPredicate(), RHSAARange); - - auto SatisfyingRegion = ConstantRange::makeSatisfyingICmpRegion( - CmpI->getPredicate(), RHSAARange); - - if (AllowedRegion.intersectWith(LHSAARange).isEmptySet()) - MustFalse = true; - - if (SatisfyingRegion.contains(LHSAARange)) - MustTrue = true; - - assert((!MustTrue || !MustFalse) && - "Either MustTrue or MustFalse should be false!"); - - if (MustTrue) - T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 1))); - else if (MustFalse) - T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 0))); - else - T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true)); - - LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA - << " " << RHSAA << "\n"); - - // TODO: Track a known state too. - return T.isValidState(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - Instruction *CtxI = getCtxI(); - auto VisitValueCB = [&](Value &V, IntegerRangeState &T, - bool Stripped) -> bool { - Instruction *I = dyn_cast<Instruction>(&V); - if (!I) { - - // If the value is not instruction, we query AA to Attributor. - const auto &AA = - A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(V)); - - // Clamp operator is not used to utilize a program point CtxI. - T.unionAssumed(AA.getAssumedConstantRange(A, CtxI)); - - return T.isValidState(); - } - - if (auto *BinOp = dyn_cast<BinaryOperator>(I)) - return calculateBinaryOperator(A, BinOp, T, CtxI); - else if (auto *CmpI = dyn_cast<CmpInst>(I)) - return calculateCmpInst(A, CmpI, T, CtxI); - else { - // Give up with other instructions. - // TODO: Add other instructions - - T.indicatePessimisticFixpoint(); - return false; - } - }; - - IntegerRangeState T(getBitWidth()); - - if (!genericValueTraversal<AAValueConstantRange, IntegerRangeState>( - A, getIRPosition(), *this, T, VisitValueCB)) - return indicatePessimisticFixpoint(); - - return clampStateAndIndicateChange(getState(), T); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(value_range) - } -}; - -struct AAValueConstantRangeFunction : AAValueConstantRangeImpl { - AAValueConstantRangeFunction(const IRPosition &IRP) - : AAValueConstantRangeImpl(IRP) {} - - /// See AbstractAttribute::initialize(...). - ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable("AAValueConstantRange(Function|CallSite)::updateImpl will " - "not be called"); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(value_range) } -}; - -struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction { - AAValueConstantRangeCallSite(const IRPosition &IRP) - : AAValueConstantRangeFunction(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(value_range) } -}; - -struct AAValueConstantRangeCallSiteReturned : AAValueConstantRangeReturned { - AAValueConstantRangeCallSiteReturned(const IRPosition &IRP) - : AAValueConstantRangeReturned(IRP) {} - - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - // If it is a load instruction with range metadata, use the metadata. - if (CallInst *CI = dyn_cast<CallInst>(&getAssociatedValue())) - if (auto *RangeMD = CI->getMetadata(LLVMContext::MD_range)) - intersectKnown(getConstantRangeFromMetadata(*RangeMD)); - - AAValueConstantRangeReturned::initialize(A); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CSRET_ATTR(value_range) - } -}; -struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { - AAValueConstantRangeCallSiteArgument(const IRPosition &IRP) - : AAValueConstantRangeFloating(IRP) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CSARG_ATTR(value_range) - } -}; -/// ---------------------------------------------------------------------------- -/// Attributor -/// ---------------------------------------------------------------------------- - -bool Attributor::isAssumedDead(const AbstractAttribute &AA, - const AAIsDead *LivenessAA) { - const Instruction *CtxI = AA.getIRPosition().getCtxI(); - if (!CtxI) - return false; - - // TODO: Find a good way to utilize fine and coarse grained liveness - // information. - if (!LivenessAA) - LivenessAA = - &getAAFor<AAIsDead>(AA, IRPosition::function(*CtxI->getFunction()), - /* TrackDependence */ false); - - // Don't check liveness for AAIsDead. - if (&AA == LivenessAA) - return false; - - if (!LivenessAA->isAssumedDead(CtxI)) - return false; - - // We actually used liveness information so we have to record a dependence. - recordDependence(*LivenessAA, AA, DepClassTy::OPTIONAL); - - return true; -} - -bool Attributor::checkForAllUses( - const function_ref<bool(const Use &, bool &)> &Pred, - const AbstractAttribute &QueryingAA, const Value &V) { const IRPosition &IRP = QueryingAA.getIRPosition(); SmallVector<const Use *, 16> Worklist; SmallPtrSet<const Use *, 16> Visited; @@ -5601,10 +646,6 @@ bool Attributor::checkForAllUses( LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size() << " initial uses to check\n"); - if (Worklist.empty()) - return true; - - bool AnyDead = false; const Function *ScopeFn = IRP.getAnchorScope(); const auto *LivenessAA = ScopeFn ? &getAAFor<AAIsDead>(QueryingAA, IRPosition::function(*ScopeFn), @@ -5615,14 +656,17 @@ bool Attributor::checkForAllUses( const Use *U = Worklist.pop_back_val(); if (!Visited.insert(U).second) continue; - LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << "\n"); - if (Instruction *UserI = dyn_cast<Instruction>(U->getUser())) - if (LivenessAA && LivenessAA->isAssumedDead(UserI)) { - LLVM_DEBUG(dbgs() << "[Attributor] Dead user: " << *UserI << ": " - << *LivenessAA << "\n"); - AnyDead = true; - continue; - } + LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in " + << *U->getUser() << "\n"); + if (isAssumedDead(*U, &QueryingAA, LivenessAA, + /* CheckBBLivenessOnly */ false, LivenessDepClass)) { + LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); + continue; + } + if (U->getUser()->isDroppable()) { + LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n"); + continue; + } bool Follow = false; if (!Pred(*U, Follow)) @@ -5633,15 +677,13 @@ bool Attributor::checkForAllUses( Worklist.push_back(&UU); } - if (AnyDead) - recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL); - return true; } -bool Attributor::checkForAllCallSites( - const function_ref<bool(AbstractCallSite)> &Pred, - const AbstractAttribute &QueryingAA, bool RequireAllCallSites) { +bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, + const AbstractAttribute &QueryingAA, + bool RequireAllCallSites, + bool &AllCallSitesKnown) { // We can try to determine information from // the call sites. However, this is only possible all call sites are known, // hence the function has internal linkage. @@ -5650,25 +692,49 @@ bool Attributor::checkForAllCallSites( if (!AssociatedFunction) { LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP << "\n"); + AllCallSitesKnown = false; return false; } return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites, - &QueryingAA); + &QueryingAA, AllCallSitesKnown); } -bool Attributor::checkForAllCallSites( - const function_ref<bool(AbstractCallSite)> &Pred, const Function &Fn, - bool RequireAllCallSites, const AbstractAttribute *QueryingAA) { +bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, + const Function &Fn, + bool RequireAllCallSites, + const AbstractAttribute *QueryingAA, + bool &AllCallSitesKnown) { if (RequireAllCallSites && !Fn.hasLocalLinkage()) { LLVM_DEBUG( dbgs() << "[Attributor] Function " << Fn.getName() << " has no internal linkage, hence not all call sites are known\n"); + AllCallSitesKnown = false; return false; } - for (const Use &U : Fn.uses()) { + // If we do not require all call sites we might not see all. + AllCallSitesKnown = RequireAllCallSites; + + SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses())); + for (unsigned u = 0; u < Uses.size(); ++u) { + const Use &U = *Uses[u]; + LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in " + << *U.getUser() << "\n"); + if (isAssumedDead(U, QueryingAA, nullptr, /* CheckBBLivenessOnly */ true)) { + LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); + continue; + } + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) { + if (CE->isCast() && CE->getType()->isPointerTy() && + CE->getType()->getPointerElementType()->isFunctionTy()) { + for (const Use &CEU : CE->uses()) + Uses.push_back(&CEU); + continue; + } + } + AbstractCallSite ACS(&U); if (!ACS) { LLVM_DEBUG(dbgs() << "[Attributor] Function " << Fn.getName() @@ -5680,22 +746,6 @@ bool Attributor::checkForAllCallSites( return false; } - Instruction *I = ACS.getInstruction(); - Function *Caller = I->getFunction(); - - const auto *LivenessAA = - lookupAAFor<AAIsDead>(IRPosition::function(*Caller), QueryingAA, - /* TrackDependence */ false); - - // Skip dead calls. - if (LivenessAA && LivenessAA->isAssumedDead(I)) { - // We actually used liveness information so we have to record a - // dependence. - if (QueryingAA) - recordDependence(*LivenessAA, *QueryingAA, DepClassTy::OPTIONAL); - continue; - } - const Use *EffectiveUse = ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U; if (!ACS.isCallee(EffectiveUse)) { @@ -5706,6 +756,24 @@ bool Attributor::checkForAllCallSites( return false; } + // Make sure the arguments that can be matched between the call site and the + // callee argee on their type. It is unlikely they do not and it doesn't + // make sense for all attributes to know/care about this. + assert(&Fn == ACS.getCalledFunction() && "Expected known callee"); + unsigned MinArgsParams = + std::min(size_t(ACS.getNumArgOperands()), Fn.arg_size()); + for (unsigned u = 0; u < MinArgsParams; ++u) { + Value *CSArgOp = ACS.getCallArgOperand(u); + if (CSArgOp && Fn.getArg(u)->getType() != CSArgOp->getType()) { + LLVM_DEBUG( + dbgs() << "[Attributor] Call site / callee argument type mismatch [" + << u << "@" << Fn.getName() << ": " + << *Fn.getArg(u)->getType() << " vs. " + << *ACS.getCallArgOperand(u)->getType() << "\n"); + return false; + } + } + if (Pred(ACS)) continue; @@ -5718,8 +786,7 @@ bool Attributor::checkForAllCallSites( } bool Attributor::checkForAllReturnedValuesAndReturnInsts( - const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> - &Pred, + function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred, const AbstractAttribute &QueryingAA) { const IRPosition &IRP = QueryingAA.getIRPosition(); @@ -5741,8 +808,7 @@ bool Attributor::checkForAllReturnedValuesAndReturnInsts( } bool Attributor::checkForAllReturnedValues( - const function_ref<bool(Value &)> &Pred, - const AbstractAttribute &QueryingAA) { + function_ref<bool(Value &)> Pred, const AbstractAttribute &QueryingAA) { const IRPosition &IRP = QueryingAA.getIRPosition(); const Function *AssociatedFunction = IRP.getAssociatedFunction(); @@ -5761,18 +827,22 @@ bool Attributor::checkForAllReturnedValues( }); } -static bool -checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap, - const function_ref<bool(Instruction &)> &Pred, - const AAIsDead *LivenessAA, bool &AnyDead, - const ArrayRef<unsigned> &Opcodes) { +static bool checkForAllInstructionsImpl( + Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap, + function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA, + const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes, + bool CheckBBLivenessOnly = false) { for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { + // Check if we have instructions with this opcode at all first. + auto *Insts = OpcodeInstMap.lookup(Opcode); + if (!Insts) + continue; + + for (Instruction *I : *Insts) { // Skip dead instructions. - if (LivenessAA && LivenessAA->isAssumedDead(I)) { - AnyDead = true; + if (A && A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA, + CheckBBLivenessOnly)) continue; - } if (!Pred(*I)) return false; @@ -5781,9 +851,10 @@ checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap, return true; } -bool Attributor::checkForAllInstructions( - const llvm::function_ref<bool(Instruction &)> &Pred, - const AbstractAttribute &QueryingAA, const ArrayRef<unsigned> &Opcodes) { +bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred, + const AbstractAttribute &QueryingAA, + const ArrayRef<unsigned> &Opcodes, + bool CheckBBLivenessOnly) { const IRPosition &IRP = QueryingAA.getIRPosition(); // Since we need to provide instructions we have to have an exact definition. @@ -5795,24 +866,18 @@ bool Attributor::checkForAllInstructions( const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); const auto &LivenessAA = getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false); - bool AnyDead = false; auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); - if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead, - Opcodes)) + if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, + &LivenessAA, Opcodes, CheckBBLivenessOnly)) return false; - // If we actually used liveness information so we have to record a dependence. - if (AnyDead) - recordDependence(LivenessAA, QueryingAA, DepClassTy::OPTIONAL); - return true; } bool Attributor::checkForAllReadWriteInstructions( - const llvm::function_ref<bool(Instruction &)> &Pred, - AbstractAttribute &QueryingAA) { + function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA) { const Function *AssociatedFunction = QueryingAA.getIRPosition().getAssociatedFunction(); @@ -5823,28 +888,21 @@ bool Attributor::checkForAllReadWriteInstructions( const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); const auto &LivenessAA = getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false); - bool AnyDead = false; for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { // Skip dead instructions. - if (LivenessAA.isAssumedDead(I)) { - AnyDead = true; + if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA)) continue; - } if (!Pred(*I)) return false; } - // If we actually used liveness information so we have to record a dependence. - if (AnyDead) - recordDependence(LivenessAA, QueryingAA, DepClassTy::OPTIONAL); - return true; } -ChangeStatus Attributor::run(Module &M) { +void Attributor::runTillFixpoint() { LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized " << AllAbstractAttributes.size() << " abstract attributes.\n"); @@ -5854,12 +912,10 @@ ChangeStatus Attributor::run(Module &M) { unsigned IterationCounter = 1; - SmallVector<AbstractAttribute *, 64> ChangedAAs; + SmallVector<AbstractAttribute *, 32> ChangedAAs; SetVector<AbstractAttribute *> Worklist, InvalidAAs; Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end()); - bool RecomputeDependences = false; - do { // Remember the size to determine new attributes. size_t NumAAs = AllAbstractAttributes.size(); @@ -5871,44 +927,35 @@ ChangeStatus Attributor::run(Module &M) { // to run updates. for (unsigned u = 0; u < InvalidAAs.size(); ++u) { AbstractAttribute *InvalidAA = InvalidAAs[u]; - auto &QuerriedAAs = QueryMap[InvalidAA]; + + // Check the dependences to fast track invalidation. LLVM_DEBUG(dbgs() << "[Attributor] InvalidAA: " << *InvalidAA << " has " - << QuerriedAAs.RequiredAAs.size() << "/" - << QuerriedAAs.OptionalAAs.size() - << " required/optional dependences\n"); - for (AbstractAttribute *DepOnInvalidAA : QuerriedAAs.RequiredAAs) { - AbstractState &DOIAAState = DepOnInvalidAA->getState(); - DOIAAState.indicatePessimisticFixpoint(); - ++NumAttributesFixedDueToRequiredDependences; - assert(DOIAAState.isAtFixpoint() && "Expected fixpoint state!"); - if (!DOIAAState.isValidState()) - InvalidAAs.insert(DepOnInvalidAA); + << InvalidAA->Deps.size() + << " required & optional dependences\n"); + while (!InvalidAA->Deps.empty()) { + const auto &Dep = InvalidAA->Deps.back(); + InvalidAA->Deps.pop_back(); + AbstractAttribute *DepAA = Dep.getPointer(); + if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) { + Worklist.insert(DepAA); + continue; + } + DepAA->getState().indicatePessimisticFixpoint(); + assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!"); + if (!DepAA->getState().isValidState()) + InvalidAAs.insert(DepAA); + else + ChangedAAs.push_back(DepAA); } - if (!RecomputeDependences) - Worklist.insert(QuerriedAAs.OptionalAAs.begin(), - QuerriedAAs.OptionalAAs.end()); - } - - // If dependences (=QueryMap) are recomputed we have to look at all abstract - // attributes again, regardless of what changed in the last iteration. - if (RecomputeDependences) { - LLVM_DEBUG( - dbgs() << "[Attributor] Run all AAs to recompute dependences\n"); - QueryMap.clear(); - ChangedAAs.clear(); - Worklist.insert(AllAbstractAttributes.begin(), - AllAbstractAttributes.end()); } // Add all abstract attributes that are potentially dependent on one that // changed to the work list. - for (AbstractAttribute *ChangedAA : ChangedAAs) { - auto &QuerriedAAs = QueryMap[ChangedAA]; - Worklist.insert(QuerriedAAs.OptionalAAs.begin(), - QuerriedAAs.OptionalAAs.end()); - Worklist.insert(QuerriedAAs.RequiredAAs.begin(), - QuerriedAAs.RequiredAAs.end()); - } + for (AbstractAttribute *ChangedAA : ChangedAAs) + while (!ChangedAA->Deps.empty()) { + Worklist.insert(ChangedAA->Deps.back().getPointer()); + ChangedAA->Deps.pop_back(); + } LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter << ", Worklist+Dependent size: " << Worklist.size() @@ -5920,23 +967,17 @@ ChangeStatus Attributor::run(Module &M) { // Update all abstract attribute in the work list and record the ones that // changed. - for (AbstractAttribute *AA : Worklist) - if (!AA->getState().isAtFixpoint() && !isAssumedDead(*AA, nullptr)) { - QueriedNonFixAA = false; - if (AA->update(*this) == ChangeStatus::CHANGED) { + for (AbstractAttribute *AA : Worklist) { + const auto &AAState = AA->getState(); + if (!AAState.isAtFixpoint()) + if (updateAA(*AA) == ChangeStatus::CHANGED) ChangedAAs.push_back(AA); - if (!AA->getState().isValidState()) - InvalidAAs.insert(AA); - } else if (!QueriedNonFixAA) { - // If the attribute did not query any non-fix information, the state - // will not change and we can indicate that right away. - AA->getState().indicateOptimisticFixpoint(); - } - } - // Check if we recompute the dependences in the next iteration. - RecomputeDependences = (DepRecomputeInterval > 0 && - IterationCounter % DepRecomputeInterval == 0); + // Use the InvalidAAs vector to propagate invalid states fast transitively + // without requiring updates. + if (!AAState.isValidState()) + InvalidAAs.insert(AA); + } // Add attributes to the changed set if they have been created in the last // iteration. @@ -5955,8 +996,6 @@ ChangeStatus Attributor::run(Module &M) { << IterationCounter << "/" << MaxFixpointIterations << " iterations\n"); - size_t NumFinalAAs = AllAbstractAttributes.size(); - // Reset abstract arguments not settled in a sound fixpoint by now. This // happens when we stopped the fixpoint iteration early. Note that only the // ones marked as "changed" *and* the ones transitively depending on them @@ -5975,11 +1014,10 @@ ChangeStatus Attributor::run(Module &M) { NumAttributesTimedOut++; } - auto &QuerriedAAs = QueryMap[ChangedAA]; - ChangedAAs.append(QuerriedAAs.OptionalAAs.begin(), - QuerriedAAs.OptionalAAs.end()); - ChangedAAs.append(QuerriedAAs.RequiredAAs.begin(), - QuerriedAAs.RequiredAAs.end()); + while (!ChangedAA->Deps.empty()) { + ChangedAAs.push_back(ChangedAA->Deps.back().getPointer()); + ChangedAA->Deps.pop_back(); + } } LLVM_DEBUG({ @@ -5988,6 +1026,19 @@ ChangeStatus Attributor::run(Module &M) { << " abstract attributes.\n"; }); + if (VerifyMaxFixpointIterations && + IterationCounter != MaxFixpointIterations) { + errs() << "\n[Attributor] Fixpoint iteration done after: " + << IterationCounter << "/" << MaxFixpointIterations + << " iterations\n"; + llvm_unreachable("The fixpoint was not reached with exactly the number of " + "specified iterations!"); + } +} + +ChangeStatus Attributor::manifestAttributes() { + size_t NumFinalAAs = AllAbstractAttributes.size(); + unsigned NumManifested = 0; unsigned NumAtFixpoint = 0; ChangeStatus ManifestChange = ChangeStatus::UNCHANGED; @@ -6006,12 +1057,14 @@ ChangeStatus Attributor::run(Module &M) { continue; // Skip dead code. - if (isAssumedDead(*AA, nullptr)) + if (isAssumedDead(*AA, nullptr, /* CheckBBLivenessOnly */ true)) continue; // Manifest the state and record if we changed the IR. ChangeStatus LocalChange = AA->manifest(*this); if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled()) AA->trackStatistics(); + LLVM_DEBUG(dbgs() << "[Attributor] Manifest " << LocalChange << " : " << *AA + << "\n"); ManifestChange = ManifestChange | LocalChange; @@ -6029,160 +1082,298 @@ ChangeStatus Attributor::run(Module &M) { NumAttributesValidFixpoint += NumAtFixpoint; (void)NumFinalAAs; - assert( - NumFinalAAs == AllAbstractAttributes.size() && - "Expected the final number of abstract attributes to remain unchanged!"); + if (NumFinalAAs != AllAbstractAttributes.size()) { + for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u) + errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u] + << " :: " + << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue() + << "\n"; + llvm_unreachable("Expected the final number of abstract attributes to " + "remain unchanged!"); + } + return ManifestChange; +} +ChangeStatus Attributor::cleanupIR() { // Delete stuff at the end to avoid invalid references and a nice order. - { - LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least " - << ToBeDeletedFunctions.size() << " functions and " - << ToBeDeletedBlocks.size() << " blocks and " - << ToBeDeletedInsts.size() << " instructions and " - << ToBeChangedUses.size() << " uses\n"); - - SmallVector<Instruction *, 32> DeadInsts; - SmallVector<Instruction *, 32> TerminatorsToFold; - - for (auto &It : ToBeChangedUses) { - Use *U = It.first; - Value *NewV = It.second; - Value *OldV = U->get(); - LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser() - << " instead of " << *OldV << "\n"); - U->set(NewV); - if (Instruction *I = dyn_cast<Instruction>(OldV)) - if (!isa<PHINode>(I) && !ToBeDeletedInsts.count(I) && - isInstructionTriviallyDead(I)) { - DeadInsts.push_back(I); - } - if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) { - Instruction *UserI = cast<Instruction>(U->getUser()); - if (isa<UndefValue>(NewV)) { - ToBeChangedToUnreachableInsts.insert(UserI); - } else { - TerminatorsToFold.push_back(UserI); - } + LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least " + << ToBeDeletedFunctions.size() << " functions and " + << ToBeDeletedBlocks.size() << " blocks and " + << ToBeDeletedInsts.size() << " instructions and " + << ToBeChangedUses.size() << " uses\n"); + + SmallVector<WeakTrackingVH, 32> DeadInsts; + SmallVector<Instruction *, 32> TerminatorsToFold; + + for (auto &It : ToBeChangedUses) { + Use *U = It.first; + Value *NewV = It.second; + Value *OldV = U->get(); + + // Do not replace uses in returns if the value is a must-tail call we will + // not delete. + if (isa<ReturnInst>(U->getUser())) + if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts())) + if (CI->isMustTailCall() && !ToBeDeletedInsts.count(CI)) + continue; + + LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser() + << " instead of " << *OldV << "\n"); + U->set(NewV); + // Do not modify call instructions outside the SCC. + if (auto *CB = dyn_cast<CallBase>(OldV)) + if (!Functions.count(CB->getCaller())) + continue; + if (Instruction *I = dyn_cast<Instruction>(OldV)) { + CGModifiedFunctions.insert(I->getFunction()); + if (!isa<PHINode>(I) && !ToBeDeletedInsts.count(I) && + isInstructionTriviallyDead(I)) + DeadInsts.push_back(I); + } + if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) { + Instruction *UserI = cast<Instruction>(U->getUser()); + if (isa<UndefValue>(NewV)) { + ToBeChangedToUnreachableInsts.insert(UserI); + } else { + TerminatorsToFold.push_back(UserI); } } - for (auto &V : InvokeWithDeadSuccessor) - if (InvokeInst *II = dyn_cast_or_null<InvokeInst>(V)) { - bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind); - bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn); - bool Invoke2CallAllowed = - !AAIsDeadFunction::mayCatchAsynchronousExceptions( - *II->getFunction()); - assert((UnwindBBIsDead || NormalBBIsDead) && - "Invoke does not have dead successors!"); - BasicBlock *BB = II->getParent(); - BasicBlock *NormalDestBB = II->getNormalDest(); - if (UnwindBBIsDead) { - Instruction *NormalNextIP = &NormalDestBB->front(); - if (Invoke2CallAllowed) { - changeToCall(II); - NormalNextIP = BB->getTerminator(); - } - if (NormalBBIsDead) - ToBeChangedToUnreachableInsts.insert(NormalNextIP); - } else { - assert(NormalBBIsDead && "Broken invariant!"); - if (!NormalDestBB->getUniquePredecessor()) - NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); - ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front()); + } + for (auto &V : InvokeWithDeadSuccessor) + if (InvokeInst *II = dyn_cast_or_null<InvokeInst>(V)) { + bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind); + bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn); + bool Invoke2CallAllowed = + !AAIsDead::mayCatchAsynchronousExceptions(*II->getFunction()); + assert((UnwindBBIsDead || NormalBBIsDead) && + "Invoke does not have dead successors!"); + BasicBlock *BB = II->getParent(); + BasicBlock *NormalDestBB = II->getNormalDest(); + if (UnwindBBIsDead) { + Instruction *NormalNextIP = &NormalDestBB->front(); + if (Invoke2CallAllowed) { + changeToCall(II); + NormalNextIP = BB->getTerminator(); } + if (NormalBBIsDead) + ToBeChangedToUnreachableInsts.insert(NormalNextIP); + } else { + assert(NormalBBIsDead && "Broken invariant!"); + if (!NormalDestBB->getUniquePredecessor()) + NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); + ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front()); } - for (auto &V : ToBeChangedToUnreachableInsts) - if (Instruction *I = dyn_cast_or_null<Instruction>(V)) - changeToUnreachable(I, /* UseLLVMTrap */ false); - for (Instruction *I : TerminatorsToFold) - ConstantFoldTerminator(I->getParent()); - - for (Instruction *I : ToBeDeletedInsts) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + } + for (Instruction *I : TerminatorsToFold) { + CGModifiedFunctions.insert(I->getFunction()); + ConstantFoldTerminator(I->getParent()); + } + for (auto &V : ToBeChangedToUnreachableInsts) + if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { + CGModifiedFunctions.insert(I->getFunction()); + changeToUnreachable(I, /* UseLLVMTrap */ false); + } + + for (auto &V : ToBeDeletedInsts) { + if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { + I->dropDroppableUses(); + CGModifiedFunctions.insert(I->getFunction()); + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); if (!isa<PHINode>(I) && isInstructionTriviallyDead(I)) DeadInsts.push_back(I); else I->eraseFromParent(); } + } - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); - - if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { - SmallVector<BasicBlock *, 8> ToBeDeletedBBs; - ToBeDeletedBBs.reserve(NumDeadBlocks); - ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); - // Actually we do not delete the blocks but squash them into a single - // unreachable but untangling branches that jump here is something we need - // to do in a more generic way. - DetatchDeadBlocks(ToBeDeletedBBs, nullptr); - STATS_DECL(AAIsDead, BasicBlock, "Number of dead basic blocks deleted."); - BUILD_STAT_NAME(AAIsDead, BasicBlock) += ToBeDeletedBlocks.size(); - } + LLVM_DEBUG(dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() + << "\n"); - // Identify dead internal functions and delete them. This happens outside - // the other fixpoint analysis as we might treat potentially dead functions - // as live to lower the number of iterations. If they happen to be dead, the - // below fixpoint loop will identify and eliminate them. - SmallVector<Function *, 8> InternalFns; - for (Function &F : M) - if (F.hasLocalLinkage()) - InternalFns.push_back(&F); - - bool FoundDeadFn = true; - while (FoundDeadFn) { - FoundDeadFn = false; - for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) { - Function *F = InternalFns[u]; - if (!F) - continue; + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + + if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { + SmallVector<BasicBlock *, 8> ToBeDeletedBBs; + ToBeDeletedBBs.reserve(NumDeadBlocks); + for (BasicBlock *BB : ToBeDeletedBlocks) { + CGModifiedFunctions.insert(BB->getParent()); + ToBeDeletedBBs.push_back(BB); + } + // Actually we do not delete the blocks but squash them into a single + // unreachable but untangling branches that jump here is something we need + // to do in a more generic way. + DetatchDeadBlocks(ToBeDeletedBBs, nullptr); + } + + // Identify dead internal functions and delete them. This happens outside + // the other fixpoint analysis as we might treat potentially dead functions + // as live to lower the number of iterations. If they happen to be dead, the + // below fixpoint loop will identify and eliminate them. + SmallVector<Function *, 8> InternalFns; + for (Function *F : Functions) + if (F->hasLocalLinkage()) + InternalFns.push_back(F); + + bool FoundDeadFn = true; + while (FoundDeadFn) { + FoundDeadFn = false; + for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) { + Function *F = InternalFns[u]; + if (!F) + continue; - if (!checkForAllCallSites( - [this](AbstractCallSite ACS) { - return ToBeDeletedFunctions.count( - ACS.getInstruction()->getFunction()); - }, - *F, true, nullptr)) - continue; + bool AllCallSitesKnown; + if (!checkForAllCallSites( + [this](AbstractCallSite ACS) { + return ToBeDeletedFunctions.count( + ACS.getInstruction()->getFunction()); + }, + *F, true, nullptr, AllCallSitesKnown)) + continue; - ToBeDeletedFunctions.insert(F); - InternalFns[u] = nullptr; - FoundDeadFn = true; - } + ToBeDeletedFunctions.insert(F); + InternalFns[u] = nullptr; + FoundDeadFn = true; } } - STATS_DECL(AAIsDead, Function, "Number of dead functions deleted."); - BUILD_STAT_NAME(AAIsDead, Function) += ToBeDeletedFunctions.size(); - // Rewrite the functions as requested during manifest. - ManifestChange = ManifestChange | rewriteFunctionSignatures(); + ChangeStatus ManifestChange = rewriteFunctionSignatures(CGModifiedFunctions); - for (Function *Fn : ToBeDeletedFunctions) { - Fn->deleteBody(); - Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); - Fn->eraseFromParent(); - } + for (Function *Fn : CGModifiedFunctions) + CGUpdater.reanalyzeFunction(*Fn); - if (VerifyMaxFixpointIterations && - IterationCounter != MaxFixpointIterations) { - errs() << "\n[Attributor] Fixpoint iteration done after: " - << IterationCounter << "/" << MaxFixpointIterations - << " iterations\n"; - llvm_unreachable("The fixpoint was not reached with exactly the number of " - "specified iterations!"); + for (Function *Fn : ToBeDeletedFunctions) + CGUpdater.removeFunction(*Fn); + + NumFnDeleted += ToBeDeletedFunctions.size(); + + LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted + << " functions after manifest.\n"); + +#ifdef EXPENSIVE_CHECKS + for (Function *F : Functions) { + if (ToBeDeletedFunctions.count(F)) + continue; + assert(!verifyFunction(*F, &errs()) && "Module verification failed!"); } +#endif return ManifestChange; } -bool Attributor::registerFunctionSignatureRewrite( - Argument &Arg, ArrayRef<Type *> ReplacementTypes, - ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB, - ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) { +ChangeStatus Attributor::run() { + SeedingPeriod = false; + runTillFixpoint(); + ChangeStatus ManifestChange = manifestAttributes(); + ChangeStatus CleanupChange = cleanupIR(); + return ManifestChange | CleanupChange; +} + +ChangeStatus Attributor::updateAA(AbstractAttribute &AA) { + // Use a new dependence vector for this update. + DependenceVector DV; + DependenceStack.push_back(&DV); + + auto &AAState = AA.getState(); + ChangeStatus CS = ChangeStatus::UNCHANGED; + if (!isAssumedDead(AA, nullptr, /* CheckBBLivenessOnly */ true)) + CS = AA.update(*this); + + if (DV.empty()) { + // If the attribute did not query any non-fix information, the state + // will not change and we can indicate that right away. + AAState.indicateOptimisticFixpoint(); + } + + if (!AAState.isAtFixpoint()) + rememberDependences(); + + // Verify the stack was used properly, that is we pop the dependence vector we + // put there earlier. + DependenceVector *PoppedDV = DependenceStack.pop_back_val(); + (void)PoppedDV; + assert(PoppedDV == &DV && "Inconsistent usage of the dependence stack!"); + + return CS; +} + +/// Create a shallow wrapper for \p F such that \p F has internal linkage +/// afterwards. It also sets the original \p F 's name to anonymous +/// +/// A wrapper is a function with the same type (and attributes) as \p F +/// that will only call \p F and return the result, if any. +/// +/// Assuming the declaration of looks like: +/// rty F(aty0 arg0, ..., atyN argN); +/// +/// The wrapper will then look as follows: +/// rty wrapper(aty0 arg0, ..., atyN argN) { +/// return F(arg0, ..., argN); +/// } +/// +static void createShallowWrapper(Function &F) { + assert(AllowShallowWrappers && + "Cannot create a wrapper if it is not allowed!"); + assert(!F.isDeclaration() && "Cannot create a wrapper around a declaration!"); + + Module &M = *F.getParent(); + LLVMContext &Ctx = M.getContext(); + FunctionType *FnTy = F.getFunctionType(); + + Function *Wrapper = + Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), F.getName()); + F.setName(""); // set the inside function anonymous + M.getFunctionList().insert(F.getIterator(), Wrapper); + + F.setLinkage(GlobalValue::InternalLinkage); + + F.replaceAllUsesWith(Wrapper); + assert(F.use_empty() && "Uses remained after wrapper was created!"); + + // Move the COMDAT section to the wrapper. + // TODO: Check if we need to keep it for F as well. + Wrapper->setComdat(F.getComdat()); + F.setComdat(nullptr); + + // Copy all metadata and attributes but keep them on F as well. + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + F.getAllMetadata(MDs); + for (auto MDIt : MDs) + Wrapper->addMetadata(MDIt.first, *MDIt.second); + Wrapper->setAttributes(F.getAttributes()); + + // Create the call in the wrapper. + BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper); + + SmallVector<Value *, 8> Args; + auto FArgIt = F.arg_begin(); + for (Argument &Arg : Wrapper->args()) { + Args.push_back(&Arg); + Arg.setName((FArgIt++)->getName()); + } + + CallInst *CI = CallInst::Create(&F, Args, "", EntryBB); + CI->setTailCall(true); + CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline); + ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB); + + NumFnShallowWrapperCreated++; +} + +bool Attributor::isValidFunctionSignatureRewrite( + Argument &Arg, ArrayRef<Type *> ReplacementTypes) { auto CallSiteCanBeChanged = [](AbstractCallSite ACS) { + // Forbid the call site to cast the function return type. If we need to + // rewrite these functions we need to re-create a cast for the new call site + // (if the old had uses). + if (!ACS.getCalledFunction() || + ACS.getInstruction()->getType() != + ACS.getCalledFunction()->getReturnType()) + return false; // Forbid must-tail calls for now. - return !ACS.isCallbackCall() && !ACS.getCallSite().isMustTailCall(); + return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall(); }; Function *Fn = Arg.getParent(); @@ -6196,14 +1387,17 @@ bool Attributor::registerFunctionSignatureRewrite( AttributeList FnAttributeList = Fn->getAttributes(); if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) || FnAttributeList.hasAttrSomewhere(Attribute::StructRet) || - FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) { + FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) || + FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) { LLVM_DEBUG( dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n"); return false; } // Avoid callbacks for now. - if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr)) { + bool AllCallSitesKnown; + if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr, + AllCallSitesKnown)) { LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n"); return false; } @@ -6216,21 +1410,35 @@ bool Attributor::registerFunctionSignatureRewrite( // Forbid must-tail calls for now. // TODO: - bool AnyDead; auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); - if (!checkForAllInstructionsImpl(OpcodeInstMap, InstPred, nullptr, AnyDead, - {Instruction::Call})) { + if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr, + nullptr, {Instruction::Call})) { LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite due to instructions\n"); return false; } - SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = ArgumentReplacementMap[Fn]; - if (ARIs.size() == 0) + return true; +} + +bool Attributor::registerFunctionSignatureRewrite( + Argument &Arg, ArrayRef<Type *> ReplacementTypes, + ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB, + ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) { + LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in " + << Arg.getParent()->getName() << " with " + << ReplacementTypes.size() << " replacements\n"); + assert(isValidFunctionSignatureRewrite(Arg, ReplacementTypes) && + "Cannot register an invalid rewrite"); + + Function *Fn = Arg.getParent(); + SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs = + ArgumentReplacementMap[Fn]; + if (ARIs.empty()) ARIs.resize(Fn->arg_size()); // If we have a replacement already with less than or equal new arguments, // ignore this request. - ArgumentReplacementInfo *&ARI = ARIs[Arg.getArgNo()]; + std::unique_ptr<ArgumentReplacementInfo> &ARI = ARIs[Arg.getArgNo()]; if (ARI && ARI->getNumReplacementArgs() <= ReplacementTypes.size()) { LLVM_DEBUG(dbgs() << "[Attributor] Existing rewrite is preferred\n"); return false; @@ -6238,18 +1446,28 @@ bool Attributor::registerFunctionSignatureRewrite( // If we have a replacement already but we like the new one better, delete // the old. - if (ARI) - delete ARI; + ARI.reset(); + + LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in " + << Arg.getParent()->getName() << " with " + << ReplacementTypes.size() << " replacements\n"); // Remember the replacement. - ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes, - std::move(CalleeRepairCB), - std::move(ACSRepairCB)); + ARI.reset(new ArgumentReplacementInfo(*this, Arg, ReplacementTypes, + std::move(CalleeRepairCB), + std::move(ACSRepairCB))); return true; } -ChangeStatus Attributor::rewriteFunctionSignatures() { +bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) { + if (SeedAllowList.size() == 0) + return true; + return std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName()); +} + +ChangeStatus Attributor::rewriteFunctionSignatures( + SmallPtrSetImpl<Function *> &ModifiedFns) { ChangeStatus Changed = ChangeStatus::UNCHANGED; for (auto &It : ArgumentReplacementMap) { @@ -6259,7 +1477,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { if (ToBeDeletedFunctions.count(OldFn)) continue; - const SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = It.getSecond(); + const SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs = + It.getSecond(); assert(ARIs.size() == OldFn->arg_size() && "Inconsistent state!"); SmallVector<Type *, 16> NewArgumentTypes; @@ -6268,7 +1487,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { // Collect replacement argument types and copy over existing attributes. AttributeList OldFnAttributeList = OldFn->getAttributes(); for (Argument &Arg : OldFn->args()) { - if (ArgumentReplacementInfo *ARI = ARIs[Arg.getArgNo()]) { + if (const std::unique_ptr<ArgumentReplacementInfo> &ARI = + ARIs[Arg.getArgNo()]) { NewArgumentTypes.append(ARI->ReplacementTypes.begin(), ARI->ReplacementTypes.end()); NewArgumentAttributes.append(ARI->getNumReplacementArgs(), @@ -6315,6 +1535,14 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { NewFn->getBasicBlockList().splice(NewFn->begin(), OldFn->getBasicBlockList()); + // Fixup block addresses to reference new function. + SmallVector<BlockAddress *, 8u> BlockAddresses; + for (User *U : OldFn->users()) + if (auto *BA = dyn_cast<BlockAddress>(U)) + BlockAddresses.push_back(BA); + for (auto *BA : BlockAddresses) + BA->replaceAllUsesWith(BlockAddress::get(NewFn, BA->getBasicBlock())); + // Set of all "call-like" instructions that invoke the old function mapped // to their new replacements. SmallVector<std::pair<CallBase *, CallBase *>, 8> CallSitePairs; @@ -6330,7 +1558,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum) { unsigned NewFirstArgNum = NewArgOperands.size(); (void)NewFirstArgNum; // only used inside assert. - if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) { + if (const std::unique_ptr<ArgumentReplacementInfo> &ARI = + ARIs[OldArgNum]) { if (ARI->ACSRepairCB) ARI->ACSRepairCB(*ARI, ACS, NewArgOperands); assert(ARI->getNumReplacementArgs() + NewFirstArgNum == @@ -6369,11 +1598,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { } // Copy over various properties and the new attributes. - uint64_t W; - if (OldCB->extractProfTotalWeight(W)) - NewCB->setProfWeight(W); + NewCB->copyMetadata(*OldCB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); NewCB->setCallingConv(OldCB->getCallingConv()); - NewCB->setDebugLoc(OldCB->getDebugLoc()); NewCB->takeName(OldCB); NewCB->setAttributes(AttributeList::get( Ctx, OldCallAttributeList.getFnAttributes(), @@ -6384,8 +1610,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { }; // Use the CallSiteReplacementCreator to create replacement call sites. - bool Success = - checkForAllCallSites(CallSiteReplacementCreator, *OldFn, true, nullptr); + bool AllCallSitesKnown; + bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn, + true, nullptr, AllCallSitesKnown); (void)Success; assert(Success && "Assumed call site replacement to succeed!"); @@ -6394,7 +1621,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { auto NewFnArgIt = NewFn->arg_begin(); for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum, ++OldFnArgIt) { - if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) { + if (const std::unique_ptr<ArgumentReplacementInfo> &ARI = + ARIs[OldArgNum]) { if (ARI->CalleeRepairCB) ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt); NewFnArgIt += ARI->ReplacementTypes.size(); @@ -6409,11 +1637,21 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { for (auto &CallSitePair : CallSitePairs) { CallBase &OldCB = *CallSitePair.first; CallBase &NewCB = *CallSitePair.second; + assert(OldCB.getType() == NewCB.getType() && + "Cannot handle call sites with different types!"); + ModifiedFns.insert(OldCB.getFunction()); + CGUpdater.replaceCallSite(OldCB, NewCB); OldCB.replaceAllUsesWith(&NewCB); OldCB.eraseFromParent(); } - ToBeDeletedFunctions.insert(OldFn); + // Replace the function in the call graph (if any). + CGUpdater.replaceFunctionWith(*OldFn, *NewFn); + + // If the old function was modified and needed to be reanalyzed, the new one + // does now. + if (ModifiedFns.erase(OldFn)) + ModifiedFns.insert(NewFn); Changed = ChangeStatus::CHANGED; } @@ -6421,13 +1659,16 @@ ChangeStatus Attributor::rewriteFunctionSignatures() { return Changed; } -void Attributor::initializeInformationCache(Function &F) { +void InformationCache::initializeInformationCache(const Function &CF, + FunctionInfo &FI) { + // As we do not modify the function here we can remove the const + // withouth breaking implicit assumptions. At the end of the day, we could + // initialize the cache eagerly which would look the same to the users. + Function &F = const_cast<Function &>(CF); // Walk all instructions to find interesting instructions that might be // queried by abstract attributes during their initialization or update. // This has to happen before we create attributes. - auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F]; - auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F]; for (Instruction &I : instructions(&F)) { bool IsInterestingOpcode = false; @@ -6439,15 +1680,23 @@ void Attributor::initializeInformationCache(Function &F) { // Note: There are no concrete attributes now so this is initially empty. switch (I.getOpcode()) { default: - assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) && - "New call site/base instruction type needs to be known int the " + assert(!isa<CallBase>(&I) && + "New call base instruction type needs to be known in the " "Attributor."); break; - case Instruction::Load: - // The alignment of a pointer is interesting for loads. - case Instruction::Store: - // The alignment of a pointer is interesting for stores. case Instruction::Call: + // Calls are interesting on their own, additionally: + // For `llvm.assume` calls we also fill the KnowledgeMap as we find them. + // For `must-tail` calls we remember the caller and callee. + if (IntrinsicInst *Assume = dyn_cast<IntrinsicInst>(&I)) { + if (Assume->getIntrinsicID() == Intrinsic::assume) + fillMapFromAssume(*Assume, KnowledgeMap); + } else if (cast<CallInst>(I).isMustTailCall()) { + FI.ContainsMustTailCall = true; + if (const Function *Callee = cast<CallInst>(I).getCalledFunction()) + getFunctionInfo(*Callee).CalledViaMustTail = true; + } + LLVM_FALLTHROUGH; case Instruction::CallBr: case Instruction::Invoke: case Instruction::CleanupRet: @@ -6457,28 +1706,55 @@ void Attributor::initializeInformationCache(Function &F) { case Instruction::Br: case Instruction::Resume: case Instruction::Ret: + case Instruction::Load: + // The alignment of a pointer is interesting for loads. + case Instruction::Store: + // The alignment of a pointer is interesting for stores. IsInterestingOpcode = true; } - if (IsInterestingOpcode) - InstOpcodeMap[I.getOpcode()].push_back(&I); + if (IsInterestingOpcode) { + auto *&Insts = FI.OpcodeInstMap[I.getOpcode()]; + if (!Insts) + Insts = new (Allocator) InstructionVectorTy(); + Insts->push_back(&I); + } if (I.mayReadOrWriteMemory()) - ReadOrWriteInsts.push_back(&I); + FI.RWInsts.push_back(&I); } + + if (F.hasFnAttribute(Attribute::AlwaysInline) && + isInlineViable(F).isSuccess()) + InlineableFunctions.insert(&F); +} + +InformationCache::FunctionInfo::~FunctionInfo() { + // The instruction vectors are allocated using a BumpPtrAllocator, we need to + // manually destroy them. + for (auto &It : OpcodeInstMap) + It.getSecond()->~InstructionVectorTy(); } void Attributor::recordDependence(const AbstractAttribute &FromAA, const AbstractAttribute &ToAA, DepClassTy DepClass) { + // If we are outside of an update, thus before the actual fixpoint iteration + // started (= when we create AAs), we do not track dependences because we will + // put all AAs into the initial worklist anyway. + if (DependenceStack.empty()) + return; if (FromAA.getState().isAtFixpoint()) return; + DependenceStack.back()->push_back({&FromAA, &ToAA, DepClass}); +} - if (DepClass == DepClassTy::REQUIRED) - QueryMap[&FromAA].RequiredAAs.insert( - const_cast<AbstractAttribute *>(&ToAA)); - else - QueryMap[&FromAA].OptionalAAs.insert( - const_cast<AbstractAttribute *>(&ToAA)); - QueriedNonFixAA = true; +void Attributor::rememberDependences() { + assert(!DependenceStack.empty() && "No dependences to remember!"); + + for (DepInfo &DI : *DependenceStack.back()) { + auto &DepAAs = const_cast<AbstractAttribute &>(*DI.FromAA).Deps; + DepAAs.push_back(AbstractAttribute::DepTy( + const_cast<AbstractAttribute *>(DI.ToAA), unsigned(DI.DepClass))); + } } void Attributor::identifyDefaultAbstractAttributes(Function &F) { @@ -6487,6 +1763,17 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { if (F.isDeclaration()) return; + // In non-module runs we need to look at the call sites of a function to + // determine if it is part of a must-tail call edge. This will influence what + // attributes we can derive. + InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F); + if (!isModulePass() && !FI.CalledViaMustTail) { + for (const Use &U : F.uses()) + if (const auto *CB = dyn_cast<CallBase>(U.getUser())) + if (CB->isCallee(&U) && CB->isMustTailCall()) + FI.CalledViaMustTail = true; + } + IRPosition FPos = IRPosition::function(F); // Check for dead BasicBlocks in every function. @@ -6518,6 +1805,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function might be "readnone/readonly/writeonly/...". getOrCreateAAFor<AAMemoryBehavior>(FPos); + // Every function can be "readnone/argmemonly/inaccessiblememonly/...". + getOrCreateAAFor<AAMemoryLocation>(FPos); + // Every function might be applicable for Heap-To-Stack conversion. if (EnableHeapToStack) getOrCreateAAFor<AAHeapToStack>(FPos); @@ -6560,6 +1850,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every argument might be simplified. getOrCreateAAFor<AAValueSimplify>(ArgPos); + // Every argument might be dead. + getOrCreateAAFor<AAIsDead>(ArgPos); + if (Arg.getType()->isPointerTy()) { // Every argument with pointer type might be marked nonnull. getOrCreateAAFor<AANonNull>(ArgPos); @@ -6582,75 +1875,87 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every argument with pointer type might be marked nofree. getOrCreateAAFor<AANoFree>(ArgPos); + + // Every argument with pointer type might be privatizable (or promotable) + getOrCreateAAFor<AAPrivatizablePtr>(ArgPos); } } auto CallSitePred = [&](Instruction &I) -> bool { - CallSite CS(&I); - if (Function *Callee = CS.getCalledFunction()) { - // Skip declerations except if annotations on their call sites were - // explicitly requested. - if (!AnnotateDeclarationCallSites && Callee->isDeclaration() && - !Callee->hasMetadata(LLVMContext::MD_callback)) - return true; + auto &CB = cast<CallBase>(I); + IRPosition CBRetPos = IRPosition::callsite_returned(CB); - if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) { + // Call sites might be dead if they do not have side effects and no live + // users. The return value might be dead if there are no live users. + getOrCreateAAFor<AAIsDead>(CBRetPos); - IRPosition CSRetPos = IRPosition::callsite_returned(CS); + Function *Callee = CB.getCalledFunction(); + // TODO: Even if the callee is not known now we might be able to simplify + // the call/callee. + if (!Callee) + return true; - // Call site return values might be dead. - getOrCreateAAFor<AAIsDead>(CSRetPos); + // Skip declarations except if annotations on their call sites were + // explicitly requested. + if (!AnnotateDeclarationCallSites && Callee->isDeclaration() && + !Callee->hasMetadata(LLVMContext::MD_callback)) + return true; - // Call site return integer values might be limited by a constant range. - if (Callee->getReturnType()->isIntegerTy()) { - getOrCreateAAFor<AAValueConstantRange>(CSRetPos); - } - } + if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) { - for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) { + IRPosition CBRetPos = IRPosition::callsite_returned(CB); - IRPosition CSArgPos = IRPosition::callsite_argument(CS, i); + // Call site return integer values might be limited by a constant range. + if (Callee->getReturnType()->isIntegerTy()) + getOrCreateAAFor<AAValueConstantRange>(CBRetPos); + } - // Every call site argument might be dead. - getOrCreateAAFor<AAIsDead>(CSArgPos); + for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) { - // Call site argument might be simplified. - getOrCreateAAFor<AAValueSimplify>(CSArgPos); + IRPosition CBArgPos = IRPosition::callsite_argument(CB, I); - if (!CS.getArgument(i)->getType()->isPointerTy()) - continue; + // Every call site argument might be dead. + getOrCreateAAFor<AAIsDead>(CBArgPos); - // Call site argument attribute "non-null". - getOrCreateAAFor<AANonNull>(CSArgPos); + // Call site argument might be simplified. + getOrCreateAAFor<AAValueSimplify>(CBArgPos); - // Call site argument attribute "no-alias". - getOrCreateAAFor<AANoAlias>(CSArgPos); + if (!CB.getArgOperand(I)->getType()->isPointerTy()) + continue; - // Call site argument attribute "dereferenceable". - getOrCreateAAFor<AADereferenceable>(CSArgPos); + // Call site argument attribute "non-null". + getOrCreateAAFor<AANonNull>(CBArgPos); - // Call site argument attribute "align". - getOrCreateAAFor<AAAlign>(CSArgPos); + // Call site argument attribute "nocapture". + getOrCreateAAFor<AANoCapture>(CBArgPos); - // Call site argument attribute - // "readnone/readonly/writeonly/..." - getOrCreateAAFor<AAMemoryBehavior>(CSArgPos); + // Call site argument attribute "no-alias". + getOrCreateAAFor<AANoAlias>(CBArgPos); - // Call site argument attribute "nofree". - getOrCreateAAFor<AANoFree>(CSArgPos); - } + // Call site argument attribute "dereferenceable". + getOrCreateAAFor<AADereferenceable>(CBArgPos); + + // Call site argument attribute "align". + getOrCreateAAFor<AAAlign>(CBArgPos); + + // Call site argument attribute + // "readnone/readonly/writeonly/..." + getOrCreateAAFor<AAMemoryBehavior>(CBArgPos); + + // Call site argument attribute "nofree". + getOrCreateAAFor<AANoFree>(CBArgPos); } return true; }; auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - bool Success, AnyDead = false; + bool Success; Success = checkForAllInstructionsImpl( - OpcodeInstMap, CallSitePred, nullptr, AnyDead, + nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr, {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, (unsigned)Instruction::Call}); (void)Success; - assert(Success && !AnyDead && "Expected the check call to be successful!"); + assert(Success && "Expected the check call to be successful!"); auto LoadStorePred = [&](Instruction &I) -> bool { if (isa<LoadInst>(I)) @@ -6662,10 +1967,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { return true; }; Success = checkForAllInstructionsImpl( - OpcodeInstMap, LoadStorePred, nullptr, AnyDead, + nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr, {(unsigned)Instruction::Load, (unsigned)Instruction::Store}); (void)Success; - assert(Success && !AnyDead && "Expected the check call to be successful!"); + assert(Success && "Expected the check call to be successful!"); } /// Helpers to ease debugging through output streams and print calls. @@ -6703,14 +2008,6 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; } -template <typename base_ty, base_ty BestState, base_ty WorstState> -raw_ostream & -llvm::operator<<(raw_ostream &OS, - const IntegerStateBase<base_ty, BestState, WorstState> &S) { - return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" - << static_cast<const AbstractState &>(S); -} - raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) { OS << "range-state(" << S.getBitWidth() << ")<"; S.getKnown().print(OS); @@ -6740,50 +2037,95 @@ void AbstractAttribute::print(raw_ostream &OS) const { /// Pass (Manager) Boilerplate /// ---------------------------------------------------------------------------- -static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { - if (DisableAttributor) +static bool runAttributorOnFunctions(InformationCache &InfoCache, + SetVector<Function *> &Functions, + AnalysisGetter &AG, + CallGraphUpdater &CGUpdater) { + if (Functions.empty()) return false; - LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size() + LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << Functions.size() << " functions.\n"); // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - InformationCache InfoCache(M, AG); - Attributor A(InfoCache, DepRecInterval); + Attributor A(Functions, InfoCache, CGUpdater); - for (Function &F : M) - A.initializeInformationCache(F); + // Create shallow wrappers for all functions that are not IPO amendable + if (AllowShallowWrappers) + for (Function *F : Functions) + if (!A.isFunctionIPOAmendable(*F)) + createShallowWrapper(*F); - for (Function &F : M) { - if (F.hasExactDefinition()) + for (Function *F : Functions) { + if (F->hasExactDefinition()) NumFnWithExactDefinition++; else NumFnWithoutExactDefinition++; // We look at internal functions only on-demand but if any use is not a - // direct call, we have to do it eagerly. - if (F.hasLocalLinkage()) { - if (llvm::all_of(F.uses(), [](const Use &U) { - return ImmutableCallSite(U.getUser()) && - ImmutableCallSite(U.getUser()).isCallee(&U); + // direct call or outside the current set of analyzed functions, we have to + // do it eagerly. + if (F->hasLocalLinkage()) { + if (llvm::all_of(F->uses(), [&Functions](const Use &U) { + const auto *CB = dyn_cast<CallBase>(U.getUser()); + return CB && CB->isCallee(&U) && + Functions.count(const_cast<Function *>(CB->getCaller())); })) continue; } // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. - A.identifyDefaultAbstractAttributes(F); + A.identifyDefaultAbstractAttributes(*F); } - bool Changed = A.run(M) == ChangeStatus::CHANGED; - assert(!verifyModule(M, &errs()) && "Module verification failed!"); - return Changed; + ChangeStatus Changed = A.run(); + LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size() + << " functions, result: " << Changed << ".\n"); + return Changed == ChangeStatus::CHANGED; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { - AnalysisGetter AG(AM); - if (runAttributorOnModule(M, AG)) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + AnalysisGetter AG(FAM); + + SetVector<Function *> Functions; + for (Function &F : M) + Functions.insert(&F); + + CallGraphUpdater CGUpdater; + BumpPtrAllocator Allocator; + InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr); + if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) { + // FIXME: Think about passes we will preserve and add them here. + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); +} + +PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + AnalysisGetter AG(FAM); + + SetVector<Function *> Functions; + for (LazyCallGraph::Node &N : C) + Functions.insert(&N.getFunction()); + + if (Functions.empty()) + return PreservedAnalyses::all(); + + Module &M = *Functions.back()->getParent(); + CallGraphUpdater CGUpdater; + CGUpdater.initialize(CG, C, AM, UR); + BumpPtrAllocator Allocator; + InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); + if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -6804,7 +2146,14 @@ struct AttributorLegacyPass : public ModulePass { return false; AnalysisGetter AG; - return runAttributorOnModule(M, AG); + SetVector<Function *> Functions; + for (Function &F : M) + Functions.insert(&F); + + CallGraphUpdater CGUpdater; + BumpPtrAllocator Allocator; + InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr); + return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -6813,158 +2162,65 @@ struct AttributorLegacyPass : public ModulePass { } }; -} // end anonymous namespace +struct AttributorCGSCCLegacyPass : public CallGraphSCCPass { + CallGraphUpdater CGUpdater; + static char ID; -Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); } + AttributorCGSCCLegacyPass() : CallGraphSCCPass(ID) { + initializeAttributorCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); + } -char AttributorLegacyPass::ID = 0; + bool runOnSCC(CallGraphSCC &SCC) override { + if (skipSCC(SCC)) + return false; -const char AAReturnedValues::ID = 0; -const char AANoUnwind::ID = 0; -const char AANoSync::ID = 0; -const char AANoFree::ID = 0; -const char AANonNull::ID = 0; -const char AANoRecurse::ID = 0; -const char AAWillReturn::ID = 0; -const char AAUndefinedBehavior::ID = 0; -const char AANoAlias::ID = 0; -const char AAReachability::ID = 0; -const char AANoReturn::ID = 0; -const char AAIsDead::ID = 0; -const char AADereferenceable::ID = 0; -const char AAAlign::ID = 0; -const char AANoCapture::ID = 0; -const char AAValueSimplify::ID = 0; -const char AAHeapToStack::ID = 0; -const char AAMemoryBehavior::ID = 0; -const char AAValueConstantRange::ID = 0; - -// Macro magic to create the static generator function for attributes that -// follow the naming scheme. - -#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \ - case IRPosition::PK: \ - llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!"); - -#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \ - case IRPosition::PK: \ - AA = new CLASS##SUFFIX(IRP); \ - break; + SetVector<Function *> Functions; + for (CallGraphNode *CGN : SCC) + if (Function *Fn = CGN->getFunction()) + if (!Fn->isDeclaration()) + Functions.insert(Fn); -#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ - CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ - CLASS *AA = nullptr; \ - switch (IRP.getPositionKind()) { \ - SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ - SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ - SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ - SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ - SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ - SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ - } \ - return *AA; \ - } + if (Functions.empty()) + return false; -#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ - CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ - CLASS *AA = nullptr; \ - switch (IRP.getPositionKind()) { \ - SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ - SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \ - SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ - } \ - return *AA; \ + AnalysisGetter AG; + CallGraph &CG = const_cast<CallGraph &>(SCC.getCallGraph()); + CGUpdater.initialize(CG, SCC); + Module &M = *Functions.back()->getParent(); + BumpPtrAllocator Allocator; + InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); + return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater); } -#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ - CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ - CLASS *AA = nullptr; \ - switch (IRP.getPositionKind()) { \ - SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ - } \ - return *AA; \ - } + bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } -#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ - CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ - CLASS *AA = nullptr; \ - switch (IRP.getPositionKind()) { \ - SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ - SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ - SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ - SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ - SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ - SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ - SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ - } \ - return *AA; \ + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: Think about passes we will preserve and add them here. + AU.addRequired<TargetLibraryInfoWrapperPass>(); + CallGraphSCCPass::getAnalysisUsage(AU); } +}; -#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ - CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ - CLASS *AA = nullptr; \ - switch (IRP.getPositionKind()) { \ - SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ - SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ - SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ - } \ - return *AA; \ - } +} // end anonymous namespace + +Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); } +Pass *llvm::createAttributorCGSCCLegacyPass() { + return new AttributorCGSCCLegacyPass(); +} -CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind) -CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync) -CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse) -CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn) -CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn) -CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues) - -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) - -CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) -CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) -CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree) - -CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) -CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability) -CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior) - -CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) - -#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION -#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION -#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION -#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION -#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION -#undef SWITCH_PK_CREATE -#undef SWITCH_PK_INV +char AttributorLegacyPass::ID = 0; +char AttributorCGSCCLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) +INITIALIZE_PASS_BEGIN(AttributorCGSCCLegacyPass, "attributor-cgscc", + "Deduce and propagate attributes (CGSCC pass)", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(AttributorCGSCCLegacyPass, "attributor-cgscc", + "Deduce and propagate attributes (CGSCC pass)", false, + false) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp new file mode 100644 index 0000000000000..7e9fd61eeb41e --- /dev/null +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -0,0 +1,7225 @@ +//===- AttributorAttributes.cpp - Attributes for Attributor deduction -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// See the Attributor.h file comment and the class descriptions in that file for +// more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/Attributor.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/NoFolder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/Utils/Local.h" + +#include <cassert> + +using namespace llvm; + +#define DEBUG_TYPE "attributor" + +static cl::opt<bool> ManifestInternal( + "attributor-manifest-internal", cl::Hidden, + cl::desc("Manifest Attributor internal string attributes."), + cl::init(false)); + +static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), + cl::Hidden); + +STATISTIC(NumAAs, "Number of abstract attributes created"); + +// Some helper macros to deal with statistics tracking. +// +// Usage: +// For simple IR attribute tracking overload trackStatistics in the abstract +// attribute and choose the right STATS_DECLTRACK_********* macro, +// e.g.,: +// void trackStatistics() const override { +// STATS_DECLTRACK_ARG_ATTR(returned) +// } +// If there is a single "increment" side one can use the macro +// STATS_DECLTRACK with a custom message. If there are multiple increment +// sides, STATS_DECL and STATS_TRACK can also be used separately. +// +#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \ + ("Number of " #TYPE " marked '" #NAME "'") +#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME +#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG); +#define STATS_DECL(NAME, TYPE, MSG) \ + STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); +#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); +#define STATS_DECLTRACK(NAME, TYPE, MSG) \ + { \ + STATS_DECL(NAME, TYPE, MSG) \ + STATS_TRACK(NAME, TYPE) \ + } +#define STATS_DECLTRACK_ARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) +#define STATS_DECLTRACK_CSARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSArguments, \ + BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME)) +#define STATS_DECLTRACK_FN_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME)) +#define STATS_DECLTRACK_CS_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME)) +#define STATS_DECLTRACK_FNRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, FunctionReturn, \ + BUILD_STAT_MSG_IR_ATTR(function returns, NAME)) +#define STATS_DECLTRACK_CSRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSReturn, \ + BUILD_STAT_MSG_IR_ATTR(call site returns, NAME)) +#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Floating, \ + ("Number of floating values known to be '" #NAME "'")) + +// Specialization of the operator<< for abstract attributes subclasses. This +// disambiguates situations where multiple operators are applicable. +namespace llvm { +#define PIPE_OPERATOR(CLASS) \ + raw_ostream &operator<<(raw_ostream &OS, const CLASS &AA) { \ + return OS << static_cast<const AbstractAttribute &>(AA); \ + } + +PIPE_OPERATOR(AAIsDead) +PIPE_OPERATOR(AANoUnwind) +PIPE_OPERATOR(AANoSync) +PIPE_OPERATOR(AANoRecurse) +PIPE_OPERATOR(AAWillReturn) +PIPE_OPERATOR(AANoReturn) +PIPE_OPERATOR(AAReturnedValues) +PIPE_OPERATOR(AANonNull) +PIPE_OPERATOR(AANoAlias) +PIPE_OPERATOR(AADereferenceable) +PIPE_OPERATOR(AAAlign) +PIPE_OPERATOR(AANoCapture) +PIPE_OPERATOR(AAValueSimplify) +PIPE_OPERATOR(AANoFree) +PIPE_OPERATOR(AAHeapToStack) +PIPE_OPERATOR(AAReachability) +PIPE_OPERATOR(AAMemoryBehavior) +PIPE_OPERATOR(AAMemoryLocation) +PIPE_OPERATOR(AAValueConstantRange) +PIPE_OPERATOR(AAPrivatizablePtr) +PIPE_OPERATOR(AAUndefinedBehavior) + +#undef PIPE_OPERATOR +} // namespace llvm + +namespace { + +static Optional<ConstantInt *> +getAssumedConstantInt(Attributor &A, const Value &V, + const AbstractAttribute &AA, + bool &UsedAssumedInformation) { + Optional<Constant *> C = A.getAssumedConstant(V, AA, UsedAssumedInformation); + if (C.hasValue()) + return dyn_cast_or_null<ConstantInt>(C.getValue()); + return llvm::None; +} + +/// Get pointer operand of memory accessing instruction. If \p I is +/// not a memory accessing instruction, return nullptr. If \p AllowVolatile, +/// is set to false and the instruction is volatile, return nullptr. +static const Value *getPointerOperand(const Instruction *I, + bool AllowVolatile) { + if (auto *LI = dyn_cast<LoadInst>(I)) { + if (!AllowVolatile && LI->isVolatile()) + return nullptr; + return LI->getPointerOperand(); + } + + if (auto *SI = dyn_cast<StoreInst>(I)) { + if (!AllowVolatile && SI->isVolatile()) + return nullptr; + return SI->getPointerOperand(); + } + + if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) { + if (!AllowVolatile && CXI->isVolatile()) + return nullptr; + return CXI->getPointerOperand(); + } + + if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) { + if (!AllowVolatile && RMWI->isVolatile()) + return nullptr; + return RMWI->getPointerOperand(); + } + + return nullptr; +} + +/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and +/// advanced by \p Offset bytes. To aid later analysis the method tries to build +/// getelement pointer instructions that traverse the natural type of \p Ptr if +/// possible. If that fails, the remaining offset is adjusted byte-wise, hence +/// through a cast to i8*. +/// +/// TODO: This could probably live somewhere more prominantly if it doesn't +/// already exist. +static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset, + IRBuilder<NoFolder> &IRB, const DataLayout &DL) { + assert(Offset >= 0 && "Negative offset not supported yet!"); + LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset + << "-bytes as " << *ResTy << "\n"); + + // The initial type we are trying to traverse to get nice GEPs. + Type *Ty = Ptr->getType(); + + SmallVector<Value *, 4> Indices; + std::string GEPName = Ptr->getName().str(); + while (Offset) { + uint64_t Idx, Rem; + + if (auto *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + if (int64_t(SL->getSizeInBytes()) < Offset) + break; + Idx = SL->getElementContainingOffset(Offset); + assert(Idx < STy->getNumElements() && "Offset calculation error!"); + Rem = Offset - SL->getElementOffset(Idx); + Ty = STy->getElementType(Idx); + } else if (auto *PTy = dyn_cast<PointerType>(Ty)) { + Ty = PTy->getElementType(); + if (!Ty->isSized()) + break; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + assert(ElementSize && "Expected type with size!"); + Idx = Offset / ElementSize; + Rem = Offset % ElementSize; + } else { + // Non-aggregate type, we cast and make byte-wise progress now. + break; + } + + LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset + << " Idx: " << Idx << " Rem: " << Rem << "\n"); + + GEPName += "." + std::to_string(Idx); + Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx)); + Offset = Rem; + } + + // Create a GEP if we collected indices above. + if (Indices.size()) + Ptr = IRB.CreateGEP(Ptr, Indices, GEPName); + + // If an offset is left we use byte-wise adjustment. + if (Offset) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy()); + Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset), + GEPName + ".b" + Twine(Offset)); + } + + // Ensure the result has the requested type. + Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + + LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); + return Ptr; +} + +/// Recursively visit all values that might become \p IRP at some point. This +/// will be done by looking through cast instructions, selects, phis, and calls +/// with the "returned" attribute. Once we cannot look through the value any +/// further, the callback \p VisitValueCB is invoked and passed the current +/// value, the \p State, and a flag to indicate if we stripped anything. +/// Stripped means that we unpacked the value associated with \p IRP at least +/// once. Note that the value used for the callback may still be the value +/// associated with \p IRP (due to PHIs). To limit how much effort is invested, +/// we will never visit more values than specified by \p MaxValues. +template <typename AAType, typename StateTy> +static bool genericValueTraversal( + Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State, + function_ref<bool(Value &, const Instruction *, StateTy &, bool)> + VisitValueCB, + const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16, + function_ref<Value *(Value *)> StripCB = nullptr) { + + const AAIsDead *LivenessAA = nullptr; + if (IRP.getAnchorScope()) + LivenessAA = &A.getAAFor<AAIsDead>( + QueryingAA, IRPosition::function(*IRP.getAnchorScope()), + /* TrackDependence */ false); + bool AnyDead = false; + + using Item = std::pair<Value *, const Instruction *>; + SmallSet<Item, 16> Visited; + SmallVector<Item, 16> Worklist; + Worklist.push_back({&IRP.getAssociatedValue(), CtxI}); + + int Iteration = 0; + do { + Item I = Worklist.pop_back_val(); + Value *V = I.first; + CtxI = I.second; + if (StripCB) + V = StripCB(V); + + // Check if we should process the current value. To prevent endless + // recursion keep a record of the values we followed! + if (!Visited.insert(I).second) + continue; + + // Make sure we limit the compile time for complex expressions. + if (Iteration++ >= MaxValues) + return false; + + // Explicitly look through calls with a "returned" attribute if we do + // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; + if (V->getType()->isPointerTy()) { + NewV = V->stripPointerCasts(); + } else { + auto *CB = dyn_cast<CallBase>(V); + if (CB && CB->getCalledFunction()) { + for (Argument &Arg : CB->getCalledFunction()->args()) + if (Arg.hasReturnedAttr()) { + NewV = CB->getArgOperand(Arg.getArgNo()); + break; + } + } + } + if (NewV && NewV != V) { + Worklist.push_back({NewV, CtxI}); + continue; + } + + // Look through select instructions, visit both potential values. + if (auto *SI = dyn_cast<SelectInst>(V)) { + Worklist.push_back({SI->getTrueValue(), CtxI}); + Worklist.push_back({SI->getFalseValue(), CtxI}); + continue; + } + + // Look through phi nodes, visit all live operands. + if (auto *PHI = dyn_cast<PHINode>(V)) { + assert(LivenessAA && + "Expected liveness in the presence of instructions!"); + for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { + BasicBlock *IncomingBB = PHI->getIncomingBlock(u); + if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA, + LivenessAA, + /* CheckBBLivenessOnly */ true)) { + AnyDead = true; + continue; + } + Worklist.push_back( + {PHI->getIncomingValue(u), IncomingBB->getTerminator()}); + } + continue; + } + + if (UseValueSimplify && !isa<Constant>(V)) { + bool UsedAssumedInformation = false; + Optional<Constant *> C = + A.getAssumedConstant(*V, QueryingAA, UsedAssumedInformation); + if (!C.hasValue()) + continue; + if (Value *NewV = C.getValue()) { + Worklist.push_back({NewV, CtxI}); + continue; + } + } + + // Once a leaf is reached we inform the user through the callback. + if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) + return false; + } while (!Worklist.empty()); + + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL); + + // All values have been visited. + return true; +} + +const Value *stripAndAccumulateMinimalOffsets( + Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val, + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + bool UseAssumed = false) { + + auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool { + const IRPosition &Pos = IRPosition::value(V); + // Only track dependence if we are going to use the assumed info. + const AAValueConstantRange &ValueConstantRangeAA = + A.getAAFor<AAValueConstantRange>(QueryingAA, Pos, + /* TrackDependence */ UseAssumed); + ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed() + : ValueConstantRangeAA.getKnown(); + // We can only use the lower part of the range because the upper part can + // be higher than what the value can really be. + ROffset = Range.getSignedMin(); + return true; + }; + + return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds, + AttributorAnalysis); +} + +static const Value *getMinimalBaseOfAccsesPointerOperand( + Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I, + int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { + const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); + if (!Ptr) + return nullptr; + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + const Value *Base = stripAndAccumulateMinimalOffsets( + A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); + + BytesOffset = OffsetAPInt.getSExtValue(); + return Base; +} + +static const Value * +getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, + const DataLayout &DL, + bool AllowNonInbounds = false) { + const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); + if (!Ptr) + return nullptr; + + return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, + AllowNonInbounds); +} + +/// Helper function to clamp a state \p S of type \p StateType with the +/// information in \p R and indicate/return if \p S did change (as-in update is +/// required to be run again). +template <typename StateType> +ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { + auto Assumed = S.getAssumed(); + S ^= R; + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + +/// Clamp the information known for all returned values of a function +/// (identified by \p QueryingAA) into \p S. +template <typename AAType, typename StateType = typename AAType::StateType> +static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for " + << QueryingAA << " into " << S << "\n"); + + assert((QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_RETURNED || + QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED) && + "Can only clamp returned value states for a function returned or call " + "site returned position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional<StateType> T; + + // Callback for each possibly returned value. + auto CheckReturnValue = [&](Value &RV) -> bool { + const IRPosition &RVPos = IRPosition::value(RV); + const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos); + LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() + << " @ " << RVPos << "\n"); + const StateType &AAS = static_cast<const StateType &>(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T + << "\n"); + return T->isValidState(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} + +/// Helper class for generic deduction: return value -> returned position. +template <typename AAType, typename BaseType, + typename StateType = typename BaseType::StateType> +struct AAReturnedFromReturnedValues : public BaseType { + AAReturnedFromReturnedValues(const IRPosition &IRP, Attributor &A) + : BaseType(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + StateType S(StateType::getBestState(this->getState())); + clampReturnedValueStates<AAType, StateType>(A, *this, S); + // TODO: If we know we visited all returned values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange<StateType>(this->getState(), S); + } +}; + +/// Clamp the information known at all call sites for a given argument +/// (identified by \p QueryingAA) into \p S. +template <typename AAType, typename StateType = typename AAType::StateType> +static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for " + << QueryingAA << " into " << S << "\n"); + + assert(QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_ARGUMENT && + "Can only clamp call site argument states for an argument position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional<StateType> T; + + // The argument number which is also the call site argument number. + unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + + auto CallSiteCheck = [&](AbstractCallSite ACS) { + const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is on not associated + // (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos); + LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() + << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); + const StateType &AAS = static_cast<const StateType &>(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T + << "\n"); + return T->isValidState(); + }; + + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true, + AllCallSitesKnown)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} + +/// Helper class for generic deduction: call site argument -> argument position. +template <typename AAType, typename BaseType, + typename StateType = typename AAType::StateType> +struct AAArgumentFromCallSiteArguments : public BaseType { + AAArgumentFromCallSiteArguments(const IRPosition &IRP, Attributor &A) + : BaseType(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + StateType S(StateType::getBestState(this->getState())); + clampCallSiteArgumentStates<AAType, StateType>(A, *this, S); + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange<StateType>(this->getState(), S); + } +}; + +/// Helper class for generic replication: function returned -> cs returned. +template <typename AAType, typename BaseType, + typename StateType = typename BaseType::StateType> +struct AACallSiteReturnedFromReturned : public BaseType { + AACallSiteReturnedFromReturned(const IRPosition &IRP, Attributor &A) + : BaseType(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + assert(this->getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED && + "Can only wrap function returned positions for call site returned " + "positions!"); + auto &S = this->getState(); + + const Function *AssociatedFunction = + this->getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) + return S.indicatePessimisticFixpoint(); + + IRPosition FnPos = IRPosition::returned(*AssociatedFunction); + const AAType &AA = A.getAAFor<AAType>(*this, FnPos); + return clampStateAndIndicateChange( + S, static_cast<const StateType &>(AA.getState())); + } +}; + +/// Helper function to accumulate uses. +template <class AAType, typename StateType = typename AAType::StateType> +static void followUsesInContext(AAType &AA, Attributor &A, + MustBeExecutedContextExplorer &Explorer, + const Instruction *CtxI, + SetVector<const Use *> &Uses, + StateType &State) { + auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); + for (unsigned u = 0; u < Uses.size(); ++u) { + const Use *U = Uses[u]; + if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) { + bool Found = Explorer.findInContextOf(UserI, EIt, EEnd); + if (Found && AA.followUseInMBEC(A, U, UserI, State)) + for (const Use &Us : UserI->uses()) + Uses.insert(&Us); + } + } +} + +/// Use the must-be-executed-context around \p I to add information into \p S. +/// The AAType class is required to have `followUseInMBEC` method with the +/// following signature and behaviour: +/// +/// bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I) +/// U - Underlying use. +/// I - The user of the \p U. +/// Returns true if the value should be tracked transitively. +/// +template <class AAType, typename StateType = typename AAType::StateType> +static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S, + Instruction &CtxI) { + + // Container for (transitive) uses of the associated value. + SetVector<const Use *> Uses; + for (const Use &U : AA.getIRPosition().getAssociatedValue().uses()) + Uses.insert(&U); + + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); + + followUsesInContext<AAType>(AA, A, Explorer, &CtxI, Uses, S); + + if (S.isAtFixpoint()) + return; + + SmallVector<const BranchInst *, 4> BrInsts; + auto Pred = [&](const Instruction *I) { + if (const BranchInst *Br = dyn_cast<BranchInst>(I)) + if (Br->isConditional()) + BrInsts.push_back(Br); + return true; + }; + + // Here, accumulate conditional branch instructions in the context. We + // explore the child paths and collect the known states. The disjunction of + // those states can be merged to its own state. Let ParentState_i be a state + // to indicate the known information for an i-th branch instruction in the + // context. ChildStates are created for its successors respectively. + // + // ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1} + // ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2} + // ... + // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m} + // + // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m + // + // FIXME: Currently, recursive branches are not handled. For example, we + // can't deduce that ptr must be dereferenced in below function. + // + // void f(int a, int c, int *ptr) { + // if(a) + // if (b) { + // *ptr = 0; + // } else { + // *ptr = 1; + // } + // else { + // if (b) { + // *ptr = 0; + // } else { + // *ptr = 1; + // } + // } + // } + + Explorer.checkForAllContext(&CtxI, Pred); + for (const BranchInst *Br : BrInsts) { + StateType ParentState; + + // The known state of the parent state is a conjunction of children's + // known states so it is initialized with a best state. + ParentState.indicateOptimisticFixpoint(); + + for (const BasicBlock *BB : Br->successors()) { + StateType ChildState; + + size_t BeforeSize = Uses.size(); + followUsesInContext(AA, A, Explorer, &BB->front(), Uses, ChildState); + + // Erase uses which only appear in the child. + for (auto It = Uses.begin() + BeforeSize; It != Uses.end();) + It = Uses.erase(It); + + ParentState &= ChildState; + } + + // Use only known state. + S += ParentState; + } +} + +/// -----------------------NoUnwind Function Attribute-------------------------- + +struct AANoUnwindImpl : AANoUnwind { + AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nounwind" : "may-unwind"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto Opcodes = { + (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, + (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; + + auto CheckForNoUnwind = [&](Instruction &I) { + if (!I.mayThrow()) + return true; + + if (const auto *CB = dyn_cast<CallBase>(&I)) { + const auto &NoUnwindAA = + A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(*CB)); + return NoUnwindAA.isAssumedNoUnwind(); + } + return false; + }; + + if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoUnwindFunction final : public AANoUnwindImpl { + AANoUnwindFunction(const IRPosition &IRP, Attributor &A) + : AANoUnwindImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) } +}; + +/// NoUnwind attribute deduction for a call sites. +struct AANoUnwindCallSite final : AANoUnwindImpl { + AANoUnwindCallSite(const IRPosition &IRP, Attributor &A) + : AANoUnwindImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoUnwindImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoUnwind::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } +}; + +/// --------------------- Function Return Values ------------------------------- + +/// "Attribute" that collects all potential returned values and the return +/// instructions that they arise from. +/// +/// If there is a unique returned value R, the manifest method will: +/// - mark R with the "returned" attribute, if R is an argument. +class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { + + /// Mapping of values potentially returned by the associated function to the + /// return instructions that might return them. + MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues; + + /// Mapping to remember the number of returned values for a call site such + /// that we can avoid updates if nothing changed. + DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA; + + /// Set of unresolved calls returned by the associated function. + SmallSetVector<CallBase *, 4> UnresolvedCalls; + + /// State flags + /// + ///{ + bool IsFixed = false; + bool IsValidState = true; + ///} + +public: + AAReturnedValuesImpl(const IRPosition &IRP, Attributor &A) + : AAReturnedValues(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // Reset the state. + IsFixed = false; + IsValidState = true; + ReturnedValues.clear(); + + Function *F = getAssociatedFunction(); + if (!F) { + indicatePessimisticFixpoint(); + return; + } + assert(!F->getReturnType()->isVoidTy() && + "Did not expect a void return type!"); + + // The map from instruction opcodes to those instructions in the function. + auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F); + + // Look through all arguments, if one is marked as returned we are done. + for (Argument &Arg : F->args()) { + if (Arg.hasReturnedAttr()) { + auto &ReturnInstSet = ReturnedValues[&Arg]; + if (auto *Insts = OpcodeInstMap.lookup(Instruction::Ret)) + for (Instruction *RI : *Insts) + ReturnInstSet.insert(cast<ReturnInst>(RI)); + + indicateOptimisticFixpoint(); + return; + } + } + + if (!A.isFunctionIPOAmendable(*F)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override; + + /// See AbstractAttribute::getState(...). + AbstractState &getState() override { return *this; } + + /// See AbstractAttribute::getState(...). + const AbstractState &getState() const override { return *this; } + + /// See AbstractAttribute::updateImpl(Attributor &A). + ChangeStatus updateImpl(Attributor &A) override; + + llvm::iterator_range<iterator> returned_values() override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + llvm::iterator_range<const_iterator> returned_values() const override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override { + return UnresolvedCalls; + } + + /// Return the number of potential return values, -1 if unknown. + size_t getNumReturnValues() const override { + return isValidState() ? ReturnedValues.size() : -1; + } + + /// Return an assumed unique return value if a single candidate is found. If + /// there cannot be one, return a nullptr. If it is not clear yet, return the + /// Optional::NoneType. + Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const; + + /// See AbstractState::checkForAllReturnedValues(...). + bool checkForAllReturnedValuesAndReturnInsts( + function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred) + const override; + + /// Pretty print the attribute similar to the IR representation. + const std::string getAsStr() const override; + + /// See AbstractState::isAtFixpoint(). + bool isAtFixpoint() const override { return IsFixed; } + + /// See AbstractState::isValidState(). + bool isValidState() const override { return IsValidState; } + + /// See AbstractState::indicateOptimisticFixpoint(...). + ChangeStatus indicateOptimisticFixpoint() override { + IsFixed = true; + return ChangeStatus::UNCHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + IsFixed = true; + IsValidState = false; + return ChangeStatus::CHANGED; + } +}; + +ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + // Bookkeeping. + assert(isValidState()); + STATS_DECLTRACK(KnownReturnValues, FunctionReturn, + "Number of function with known return values"); + + // Check if we have an assumed unique return value that we could manifest. + Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A); + + if (!UniqueRV.hasValue() || !UniqueRV.getValue()) + return Changed; + + // Bookkeeping. + STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, + "Number of function with unique return"); + + // Callback to replace the uses of CB with the constant C. + auto ReplaceCallSiteUsersWith = [&A](CallBase &CB, Constant &C) { + if (CB.use_empty()) + return ChangeStatus::UNCHANGED; + if (A.changeValueAfterManifest(CB, C)) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; + }; + + // If the assumed unique return value is an argument, annotate it. + if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) { + if (UniqueRVArg->getType()->canLosslesslyBitCastTo( + getAssociatedFunction()->getReturnType())) { + getIRPosition() = IRPosition::argument(*UniqueRVArg); + Changed = IRAttribute::manifest(A); + } + } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) { + // We can replace the returned value with the unique returned constant. + Value &AnchorValue = getAnchorValue(); + if (Function *F = dyn_cast<Function>(&AnchorValue)) { + for (const Use &U : F->uses()) + if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) + if (CB->isCallee(&U)) { + Constant *RVCCast = + CB->getType() == RVC->getType() + ? RVC + : ConstantExpr::getTruncOrBitCast(RVC, CB->getType()); + Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed; + } + } else { + assert(isa<CallBase>(AnchorValue) && + "Expcected a function or call base anchor!"); + Constant *RVCCast = + AnchorValue.getType() == RVC->getType() + ? RVC + : ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType()); + Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast); + } + if (Changed == ChangeStatus::CHANGED) + STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn, + "Number of function returns replaced by constant return"); + } + + return Changed; +} + +const std::string AAReturnedValuesImpl::getAsStr() const { + return (isAtFixpoint() ? "returns(#" : "may-return(#") + + (isValidState() ? std::to_string(getNumReturnValues()) : "?") + + ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]"; +} + +Optional<Value *> +AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const { + // If checkForAllReturnedValues provides a unique value, ignoring potential + // undef values that can also be present, it is assumed to be the actual + // return value and forwarded to the caller of this method. If there are + // multiple, a nullptr is returned indicating there cannot be a unique + // returned value. + Optional<Value *> UniqueRV; + + auto Pred = [&](Value &RV) -> bool { + // If we found a second returned value and neither the current nor the saved + // one is an undef, there is no unique returned value. Undefs are special + // since we can pretend they have any value. + if (UniqueRV.hasValue() && UniqueRV != &RV && + !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) { + UniqueRV = nullptr; + return false; + } + + // Do not overwrite a value with an undef. + if (!UniqueRV.hasValue() || !isa<UndefValue>(RV)) + UniqueRV = &RV; + + return true; + }; + + if (!A.checkForAllReturnedValues(Pred, *this)) + UniqueRV = nullptr; + + return UniqueRV; +} + +bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( + function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred) + const { + if (!isValidState()) + return false; + + // Check all returned values but ignore call sites as long as we have not + // encountered an overdefined one during an update. + for (auto &It : ReturnedValues) { + Value *RV = It.first; + + CallBase *CB = dyn_cast<CallBase>(RV); + if (CB && !UnresolvedCalls.count(CB)) + continue; + + if (!Pred(*RV, It.second)) + return false; + } + + return true; +} + +ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { + size_t NumUnresolvedCalls = UnresolvedCalls.size(); + bool Changed = false; + + // State used in the value traversals starting in returned values. + struct RVState { + // The map in which we collect return values -> return instrs. + decltype(ReturnedValues) &RetValsMap; + // The flag to indicate a change. + bool &Changed; + // The return instrs we come from. + SmallSetVector<ReturnInst *, 4> RetInsts; + }; + + // Callback for a leaf value returned by the associated function. + auto VisitValueCB = [](Value &Val, const Instruction *, RVState &RVS, + bool) -> bool { + auto Size = RVS.RetValsMap[&Val].size(); + RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end()); + bool Inserted = RVS.RetValsMap[&Val].size() != Size; + RVS.Changed |= Inserted; + LLVM_DEBUG({ + if (Inserted) + dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val + << " => " << RVS.RetInsts.size() << "\n"; + }); + return true; + }; + + // Helper method to invoke the generic value traversal. + auto VisitReturnedValue = [&](Value &RV, RVState &RVS, + const Instruction *CtxI) { + IRPosition RetValPos = IRPosition::value(RV); + return genericValueTraversal<AAReturnedValues, RVState>( + A, RetValPos, *this, RVS, VisitValueCB, CtxI, + /* UseValueSimplify */ false); + }; + + // Callback for all "return intructions" live in the associated function. + auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) { + ReturnInst &Ret = cast<ReturnInst>(I); + RVState RVS({ReturnedValues, Changed, {}}); + RVS.RetInsts.insert(&Ret); + return VisitReturnedValue(*Ret.getReturnValue(), RVS, &I); + }; + + // Start by discovering returned values from all live returned instructions in + // the associated function. + if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret})) + return indicatePessimisticFixpoint(); + + // Once returned values "directly" present in the code are handled we try to + // resolve returned calls. To avoid modifications to the ReturnedValues map + // while we iterate over it we kept record of potential new entries in a copy + // map, NewRVsMap. + decltype(ReturnedValues) NewRVsMap; + + auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV + << " by #" << RIs.size() << " RIs\n"); + CallBase *CB = dyn_cast<CallBase>(RV); + if (!CB || UnresolvedCalls.count(CB)) + return; + + if (!CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); + return; + } + + // TODO: use the function scope once we have call site AAReturnedValues. + const auto &RetValAA = A.getAAFor<AAReturnedValues>( + *this, IRPosition::function(*CB->getCalledFunction())); + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: " + << RetValAA << "\n"); + + // Skip dead ends, thus if we do not know anything about the returned + // call we mark it as unresolved and it will stay that way. + if (!RetValAA.getState().isValidState()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); + return; + } + + // Do not try to learn partial information. If the callee has unresolved + // return values we will treat the call as unresolved/opaque. + auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls(); + if (!RetValAAUnresolvedCalls.empty()) { + UnresolvedCalls.insert(CB); + return; + } + + // Now check if we can track transitively returned values. If possible, thus + // if all return value can be represented in the current scope, do so. + bool Unresolved = false; + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) || + isa<Constant>(RetVal)) + continue; + // Anything that did not fit in the above categories cannot be resolved, + // mark the call as unresolved. + LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value " + "cannot be translated: " + << *RetVal << "\n"); + UnresolvedCalls.insert(CB); + Unresolved = true; + break; + } + + if (Unresolved) + return; + + // Now track transitively returned values. + unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB]; + if (NumRetAA == RetValAA.getNumReturnValues()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not " + "changed since it was seen last\n"); + return; + } + NumRetAA = RetValAA.getNumReturnValues(); + + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (Argument *Arg = dyn_cast<Argument>(RetVal)) { + // Arguments are mapped to call site operands and we begin the traversal + // again. + bool Unused = false; + RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); + VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB); + continue; + } else if (isa<CallBase>(RetVal)) { + // Call sites are resolved by the callee attribute over time, no need to + // do anything for us. + continue; + } else if (isa<Constant>(RetVal)) { + // Constants are valid everywhere, we can simply take them. + NewRVsMap[RetVal].insert(RIs.begin(), RIs.end()); + continue; + } + } + }; + + for (auto &It : ReturnedValues) + HandleReturnValue(It.first, It.second); + + // Because processing the new information can again lead to new return values + // we have to be careful and iterate until this iteration is complete. The + // idea is that we are in a stable state at the end of an update. All return + // values have been handled and properly categorized. We might not update + // again if we have not requested a non-fix attribute so we cannot "wait" for + // the next update to analyze a new return value. + while (!NewRVsMap.empty()) { + auto It = std::move(NewRVsMap.back()); + NewRVsMap.pop_back(); + + assert(!It.second.empty() && "Entry does not add anything."); + auto &ReturnInsts = ReturnedValues[It.first]; + for (ReturnInst *RI : It.second) + if (ReturnInsts.insert(RI)) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value " + << *It.first << " => " << *RI << "\n"); + HandleReturnValue(It.first, ReturnInsts); + Changed = true; + } + } + + Changed |= (NumUnresolvedCalls != UnresolvedCalls.size()); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; +} + +struct AAReturnedValuesFunction final : public AAReturnedValuesImpl { + AAReturnedValuesFunction(const IRPosition &IRP, Attributor &A) + : AAReturnedValuesImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) } +}; + +/// Returned values information for a call sites. +struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { + AAReturnedValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAReturnedValuesImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for returned values are not " + "supported for call sites yet!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// ------------------------ NoSync Function Attribute ------------------------- + +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nosync" : "may-sync"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// Helper function used to determine whether an instruction is non-relaxed + /// atomic. In other words, if an atomic instruction does not have unordered + /// or monotonic ordering + static bool isNonRelaxedAtomic(Instruction *I); + + /// Helper function used to determine whether an instruction is volatile. + static bool isVolatile(Instruction *I); + + /// Helper function uset to check if intrinsic is volatile (memcpy, memmove, + /// memset). + static bool isNoSyncIntrinsic(Instruction *I); +}; + +bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { + if (!I->isAtomic()) + return false; + + AtomicOrdering Ordering; + switch (I->getOpcode()) { + case Instruction::AtomicRMW: + Ordering = cast<AtomicRMWInst>(I)->getOrdering(); + break; + case Instruction::Store: + Ordering = cast<StoreInst>(I)->getOrdering(); + break; + case Instruction::Load: + Ordering = cast<LoadInst>(I)->getOrdering(); + break; + case Instruction::Fence: { + auto *FI = cast<FenceInst>(I); + if (FI->getSyncScopeID() == SyncScope::SingleThread) + return false; + Ordering = FI->getOrdering(); + break; + } + case Instruction::AtomicCmpXchg: { + AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering(); + AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering(); + // Only if both are relaxed, than it can be treated as relaxed. + // Otherwise it is non-relaxed. + if (Success != AtomicOrdering::Unordered && + Success != AtomicOrdering::Monotonic) + return true; + if (Failure != AtomicOrdering::Unordered && + Failure != AtomicOrdering::Monotonic) + return true; + return false; + } + default: + llvm_unreachable( + "New atomic operations need to be known in the attributor."); + } + + // Relaxed. + if (Ordering == AtomicOrdering::Unordered || + Ordering == AtomicOrdering::Monotonic) + return false; + return true; +} + +/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics. +/// FIXME: We should ipmrove the handling of intrinsics. +bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + /// Element wise atomic memory intrinsics are can only be unordered, + /// therefore nosync. + case Intrinsic::memset_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memcpy_element_unordered_atomic: + return true; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + if (!cast<MemIntrinsic>(II)->isVolatile()) + return true; + return false; + default: + return false; + } + } + return false; +} + +bool AANoSyncImpl::isVolatile(Instruction *I) { + assert(!isa<CallBase>(I) && "Calls should not be checked here"); + + switch (I->getOpcode()) { + case Instruction::AtomicRMW: + return cast<AtomicRMWInst>(I)->isVolatile(); + case Instruction::Store: + return cast<StoreInst>(I)->isVolatile(); + case Instruction::Load: + return cast<LoadInst>(I)->isVolatile(); + case Instruction::AtomicCmpXchg: + return cast<AtomicCmpXchgInst>(I)->isVolatile(); + default: + return false; + } +} + +ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { + + auto CheckRWInstForNoSync = [&](Instruction &I) { + /// We are looking for volatile instructions or Non-Relaxed atomics. + /// FIXME: We should improve the handling of intrinsics. + + if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I)) + return true; + + if (const auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->hasFnAttr(Attribute::NoSync)) + return true; + + const auto &NoSyncAA = + A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(*CB)); + if (NoSyncAA.isAssumedNoSync()) + return true; + return false; + } + + if (!isVolatile(&I) && !isNonRelaxedAtomic(&I)) + return true; + + return false; + }; + + auto CheckForNoSync = [&](Instruction &I) { + // At this point we handled all read/write effects and they are all + // nosync, so they can be skipped. + if (I.mayReadOrWriteMemory()) + return true; + + // non-convergent and readnone imply nosync. + return !cast<CallBase>(I).isConvergent(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) || + !A.checkForAllCallLikeInstructions(CheckForNoSync, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; +} + +struct AANoSyncFunction final : public AANoSyncImpl { + AANoSyncFunction(const IRPosition &IRP, Attributor &A) + : AANoSyncImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) } +}; + +/// NoSync attribute deduction for a call sites. +struct AANoSyncCallSite final : AANoSyncImpl { + AANoSyncCallSite(const IRPosition &IRP, Attributor &A) + : AANoSyncImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoSyncImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoSync::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } +}; + +/// ------------------------ No-Free Attributes ---------------------------- + +struct AANoFreeImpl : public AANoFree { + AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForNoFree = [&](Instruction &I) { + const auto &CB = cast<CallBase>(I); + if (CB.hasFnAttr(Attribute::NoFree)) + return true; + + const auto &NoFreeAA = + A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(CB)); + return NoFreeAA.isAssumedNoFree(); + }; + + if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "nofree" : "may-free"; + } +}; + +struct AANoFreeFunction final : public AANoFreeImpl { + AANoFreeFunction(const IRPosition &IRP, Attributor &A) + : AANoFreeImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) } +}; + +/// NoFree attribute deduction for a call sites. +struct AANoFreeCallSite final : AANoFreeImpl { + AANoFreeCallSite(const IRPosition &IRP, Attributor &A) + : AANoFreeImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoFreeImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoFree::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); } +}; + +/// NoFree attribute for floating values. +struct AANoFreeFloating : AANoFreeImpl { + AANoFreeFloating(const IRPosition &IRP, Attributor &A) + : AANoFreeImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override{STATS_DECLTRACK_FLOATING_ATTR(nofree)} + + /// See Abstract Attribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + const IRPosition &IRP = getIRPosition(); + + const auto &NoFreeAA = + A.getAAFor<AANoFree>(*this, IRPosition::function_scope(IRP)); + if (NoFreeAA.isAssumedNoFree()) + return ChangeStatus::UNCHANGED; + + Value &AssociatedValue = getIRPosition().getAssociatedValue(); + auto Pred = [&](const Use &U, bool &Follow) -> bool { + Instruction *UserI = cast<Instruction>(U.getUser()); + if (auto *CB = dyn_cast<CallBase>(UserI)) { + if (CB->isBundleOperand(&U)) + return false; + if (!CB->isArgOperand(&U)) + return true; + unsigned ArgNo = CB->getArgOperandNo(&U); + + const auto &NoFreeArg = A.getAAFor<AANoFree>( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + return NoFreeArg.isAssumedNoFree(); + } + + if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) || + isa<PHINode>(UserI) || isa<SelectInst>(UserI)) { + Follow = true; + return true; + } + if (isa<ReturnInst>(UserI)) + return true; + + // Unknown user. + return false; + }; + if (!A.checkForAllUses(Pred, *this, AssociatedValue)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } +}; + +/// NoFree attribute for a call site argument. +struct AANoFreeArgument final : AANoFreeFloating { + AANoFreeArgument(const IRPosition &IRP, Attributor &A) + : AANoFreeFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofree) } +}; + +/// NoFree attribute for call site arguments. +struct AANoFreeCallSiteArgument final : AANoFreeFloating { + AANoFreeCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AANoFreeFloating(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoFree::StateType &>(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)}; +}; + +/// NoFree attribute for function return value. +struct AANoFreeReturned final : AANoFreeFloating { + AANoFreeReturned(const IRPosition &IRP, Attributor &A) + : AANoFreeFloating(IRP, A) { + llvm_unreachable("NoFree is not applicable to function returns!"); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("NoFree is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("NoFree is not applicable to function returns!"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// NoFree attribute deduction for a call site return value. +struct AANoFreeCallSiteReturned final : AANoFreeFloating { + AANoFreeCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AANoFreeFloating(IRP, A) {} + + ChangeStatus manifest(Attributor &A) override { + return ChangeStatus::UNCHANGED; + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) } +}; + +/// ------------------------ NonNull Argument Attribute ------------------------ +static int64_t getKnownNonNullAndDerefBytesForUse( + Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue, + const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { + TrackUse = false; + + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return 0; + + Type *PtrTy = UseV->getType(); + const Function *F = I->getFunction(); + bool NullPointerIsDefined = + F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true; + const DataLayout &DL = A.getInfoCache().getDL(); + if (const auto *CB = dyn_cast<CallBase>(I)) { + if (CB->isBundleOperand(U)) { + if (RetainedKnowledge RK = getKnowledgeFromUse( + U, {Attribute::NonNull, Attribute::Dereferenceable})) { + IsNonNull |= + (RK.AttrKind == Attribute::NonNull || !NullPointerIsDefined); + return RK.ArgValue; + } + return 0; + } + + if (CB->isCallee(U)) { + IsNonNull |= !NullPointerIsDefined; + return 0; + } + + unsigned ArgNo = CB->getArgOperandNo(U); + IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo); + // As long as we only use known information there is no need to track + // dependences here. + auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP, + /* TrackDependence */ false); + IsNonNull |= DerefAA.isKnownNonNull(); + return DerefAA.getKnownDereferenceableBytes(); + } + + // We need to follow common pointer manipulation uses to the accesses they + // feed into. We can try to be smart to avoid looking through things we do not + // like for now, e.g., non-inbounds GEPs. + if (isa<CastInst>(I)) { + TrackUse = true; + return 0; + } + + if (isa<GetElementPtrInst>(I)) { + TrackUse = true; + return 0; + } + + int64_t Offset; + const Value *Base = + getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL); + if (Base) { + if (Base == &AssociatedValue && + getPointerOperand(I, /* AllowVolatile */ false) == UseV) { + int64_t DerefBytes = + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset; + + IsNonNull |= !NullPointerIsDefined; + return std::max(int64_t(0), DerefBytes); + } + } + + /// Corner case when an offset is 0. + Base = getBasePointerOfAccessPointerOperand(I, Offset, DL, + /*AllowNonInbounds*/ true); + if (Base) { + if (Offset == 0 && Base == &AssociatedValue && + getPointerOperand(I, /* AllowVolatile */ false) == UseV) { + int64_t DerefBytes = + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); + IsNonNull |= !NullPointerIsDefined; + return std::max(int64_t(0), DerefBytes); + } + } + + return 0; +} + +struct AANonNullImpl : AANonNull { + AANonNullImpl(const IRPosition &IRP, Attributor &A) + : AANonNull(IRP, A), + NullIsDefined(NullPointerIsDefined( + getAnchorScope(), + getAssociatedValue().getType()->getPointerAddressSpace())) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAssociatedValue(); + if (!NullIsDefined && + hasAttr({Attribute::NonNull, Attribute::Dereferenceable}, + /* IgnoreSubsumingPositions */ false, &A)) + indicateOptimisticFixpoint(); + else if (isa<ConstantPointerNull>(V)) + indicatePessimisticFixpoint(); + else + AANonNull::initialize(A); + + bool CanBeNull = true; + if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull)) + if (!CanBeNull) + indicateOptimisticFixpoint(); + + if (!getState().isAtFixpoint()) + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); + } + + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AANonNull::StateType &State) { + bool IsNonNull = false; + bool TrackUse = false; + getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I, + IsNonNull, TrackUse); + State.setKnown(IsNonNull); + return TrackUse; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "nonnull" : "may-null"; + } + + /// Flag to determine if the underlying value can be null and still allow + /// valid accesses. + const bool NullIsDefined; +}; + +/// NonNull attribute for a floating value. +struct AANonNullFloating : public AANonNullImpl { + AANonNullFloating(const IRPosition &IRP, Attributor &A) + : AANonNullImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + if (!NullIsDefined) { + const auto &DerefAA = + A.getAAFor<AADereferenceable>(*this, getIRPosition()); + if (DerefAA.getAssumedDereferenceableBytes()) + return ChangeStatus::UNCHANGED; + } + + const DataLayout &DL = A.getDataLayout(); + + DominatorTree *DT = nullptr; + AssumptionCache *AC = nullptr; + InformationCache &InfoCache = A.getInfoCache(); + if (const Function *Fn = getAnchorScope()) { + DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn); + AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn); + } + + auto VisitValueCB = [&](Value &V, const Instruction *CtxI, + AANonNull::StateType &T, bool Stripped) -> bool { + const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + if (!isKnownNonZero(&V, DL, 0, AC, CtxI, DT)) + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AANonNull::StateType &NS = + static_cast<const AANonNull::StateType &>(AA.getState()); + T ^= NS; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal<AANonNull, StateType>( + A, getIRPosition(), *this, T, VisitValueCB, getCtxI())) + return indicatePessimisticFixpoint(); + + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; + +/// NonNull attribute for function return value. +struct AANonNullReturned final + : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> { + AANonNullReturned(const IRPosition &IRP, Attributor &A) + : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; + +/// NonNull attribute for function argument. +struct AANonNullArgument final + : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl> { + AANonNullArgument(const IRPosition &IRP, Attributor &A) + : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl>(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) } +}; + +struct AANonNullCallSiteArgument final : AANonNullFloating { + AANonNullCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AANonNullFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) } +}; + +/// NonNull attribute for a call site return position. +struct AANonNullCallSiteReturned final + : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl> { + AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl>(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } +}; + +/// ------------------------ No-Recurse Attributes ---------------------------- + +struct AANoRecurseImpl : public AANoRecurse { + AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {} + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "norecurse" : "may-recurse"; + } +}; + +struct AANoRecurseFunction final : AANoRecurseImpl { + AANoRecurseFunction(const IRPosition &IRP, Attributor &A) + : AANoRecurseImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + if (const Function *F = getAnchorScope()) + if (A.getInfoCache().getSccSize(*F) != 1) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + + // If all live call sites are known to be no-recurse, we are as well. + auto CallSitePred = [&](AbstractCallSite ACS) { + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( + *this, IRPosition::function(*ACS.getInstruction()->getFunction()), + /* TrackDependence */ false, DepClassTy::OPTIONAL); + return NoRecurseAA.isKnownNoRecurse(); + }; + bool AllCallSitesKnown; + if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) { + // If we know all call sites and all are known no-recurse, we are done. + // If all known call sites, which might not be all that exist, are known + // to be no-recurse, we are not done but we can continue to assume + // no-recurse. If one of the call sites we have not visited will become + // live, another update is triggered. + if (AllCallSitesKnown) + indicateOptimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + // If the above check does not hold anymore we look at the calls. + auto CheckForNoRecurse = [&](Instruction &I) { + const auto &CB = cast<CallBase>(I); + if (CB.hasFnAttr(Attribute::NoRecurse)) + return true; + + const auto &NoRecurseAA = + A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(CB)); + if (!NoRecurseAA.isAssumedNoRecurse()) + return false; + + // Recursion to the same function + if (CB.getCalledFunction() == getAnchorScope()) + return false; + + return true; + }; + + if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) } +}; + +/// NoRecurse attribute deduction for a call sites. +struct AANoRecurseCallSite final : AANoRecurseImpl { + AANoRecurseCallSite(const IRPosition &IRP, Attributor &A) + : AANoRecurseImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoRecurse::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } +}; + +/// -------------------- Undefined-Behavior Attributes ------------------------ + +struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { + AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A) + : AAUndefinedBehavior(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + // through a pointer (i.e. also branches etc.) + ChangeStatus updateImpl(Attributor &A) override { + const size_t UBPrevSize = KnownUBInsts.size(); + const size_t NoUBPrevSize = AssumedNoUBInsts.size(); + + auto InspectMemAccessInstForUB = [&](Instruction &I) { + // Skip instructions that are already saved. + if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) + return true; + + // If we reach here, we know we have an instruction + // that accesses memory through a pointer operand, + // for which getPointerOperand() should give it to us. + const Value *PtrOp = getPointerOperand(&I, /* AllowVolatile */ true); + assert(PtrOp && + "Expected pointer operand of memory accessing instruction"); + + // Either we stopped and the appropriate action was taken, + // or we got back a simplified value to continue. + Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I); + if (!SimplifiedPtrOp.hasValue()) + return true; + const Value *PtrOpVal = SimplifiedPtrOp.getValue(); + + // A memory access through a pointer is considered UB + // only if the pointer has constant null value. + // TODO: Expand it to not only check constant values. + if (!isa<ConstantPointerNull>(PtrOpVal)) { + AssumedNoUBInsts.insert(&I); + return true; + } + const Type *PtrTy = PtrOpVal->getType(); + + // Because we only consider instructions inside functions, + // assume that a parent function exists. + const Function *F = I.getFunction(); + + // A memory access using constant null pointer is only considered UB + // if null pointer is _not_ defined for the target platform. + if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace())) + AssumedNoUBInsts.insert(&I); + else + KnownUBInsts.insert(&I); + return true; + }; + + auto InspectBrInstForUB = [&](Instruction &I) { + // A conditional branch instruction is considered UB if it has `undef` + // condition. + + // Skip instructions that are already saved. + if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) + return true; + + // We know we have a branch instruction. + auto BrInst = cast<BranchInst>(&I); + + // Unconditional branches are never considered UB. + if (BrInst->isUnconditional()) + return true; + + // Either we stopped and the appropriate action was taken, + // or we got back a simplified value to continue. + Optional<Value *> SimplifiedCond = + stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst); + if (!SimplifiedCond.hasValue()) + return true; + AssumedNoUBInsts.insert(&I); + return true; + }; + + A.checkForAllInstructions(InspectMemAccessInstForUB, *this, + {Instruction::Load, Instruction::Store, + Instruction::AtomicCmpXchg, + Instruction::AtomicRMW}, + /* CheckBBLivenessOnly */ true); + A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br}, + /* CheckBBLivenessOnly */ true); + if (NoUBPrevSize != AssumedNoUBInsts.size() || + UBPrevSize != KnownUBInsts.size()) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; + } + + bool isKnownToCauseUB(Instruction *I) const override { + return KnownUBInsts.count(I); + } + + bool isAssumedToCauseUB(Instruction *I) const override { + // In simple words, if an instruction is not in the assumed to _not_ + // cause UB, then it is assumed UB (that includes those + // in the KnownUBInsts set). The rest is boilerplate + // is to ensure that it is one of the instructions we test + // for UB. + + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: + return !AssumedNoUBInsts.count(I); + case Instruction::Br: { + auto BrInst = cast<BranchInst>(I); + if (BrInst->isUnconditional()) + return false; + return !AssumedNoUBInsts.count(I); + } break; + default: + return false; + } + return false; + } + + ChangeStatus manifest(Attributor &A) override { + if (KnownUBInsts.empty()) + return ChangeStatus::UNCHANGED; + for (Instruction *I : KnownUBInsts) + A.changeToUnreachableAfterManifest(I); + return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "undefined-behavior" : "no-ub"; + } + + /// Note: The correctness of this analysis depends on the fact that the + /// following 2 sets will stop changing after some point. + /// "Change" here means that their size changes. + /// The size of each set is monotonically increasing + /// (we only add items to them) and it is upper bounded by the number of + /// instructions in the processed function (we can never save more + /// elements in either set than this number). Hence, at some point, + /// they will stop increasing. + /// Consequently, at some point, both sets will have stopped + /// changing, effectively making the analysis reach a fixpoint. + + /// Note: These 2 sets are disjoint and an instruction can be considered + /// one of 3 things: + /// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in + /// the KnownUBInsts set. + /// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior + /// has a reason to assume it). + /// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior + /// could not find a reason to assume or prove that it can cause UB, + /// hence it assumes it doesn't. We have a set for these instructions + /// so that we don't reprocess them in every update. + /// Note however that instructions in this set may cause UB. + +protected: + /// A set of all live instructions _known_ to cause UB. + SmallPtrSet<Instruction *, 8> KnownUBInsts; + +private: + /// A set of all the (live) instructions that are assumed to _not_ cause UB. + SmallPtrSet<Instruction *, 8> AssumedNoUBInsts; + + // Should be called on updates in which if we're processing an instruction + // \p I that depends on a value \p V, one of the following has to happen: + // - If the value is assumed, then stop. + // - If the value is known but undef, then consider it UB. + // - Otherwise, do specific processing with the simplified value. + // We return None in the first 2 cases to signify that an appropriate + // action was taken and the caller should stop. + // Otherwise, we return the simplified value that the caller should + // use for specific processing. + Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V, + Instruction *I) { + const auto &ValueSimplifyAA = + A.getAAFor<AAValueSimplify>(*this, IRPosition::value(*V)); + Optional<Value *> SimplifiedV = + ValueSimplifyAA.getAssumedSimplifiedValue(A); + if (!ValueSimplifyAA.isKnown()) { + // Don't depend on assumed values. + return llvm::None; + } + if (!SimplifiedV.hasValue()) { + // If it is known (which we tested above) but it doesn't have a value, + // then we can assume `undef` and hence the instruction is UB. + KnownUBInsts.insert(I); + return llvm::None; + } + Value *Val = SimplifiedV.getValue(); + if (isa<UndefValue>(Val)) { + KnownUBInsts.insert(I); + return llvm::None; + } + return Val; + } +}; + +struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl { + AAUndefinedBehaviorFunction(const IRPosition &IRP, Attributor &A) + : AAUndefinedBehaviorImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(UndefinedBehaviorInstruction, Instruction, + "Number of instructions known to have UB"); + BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) += + KnownUBInsts.size(); + } +}; + +/// ------------------------ Will-Return Attributes ---------------------------- + +// Helper function that checks whether a function has any cycle which we don't +// know if it is bounded or not. +// Loops with maximum trip count are considered bounded, any other cycle not. +static bool mayContainUnboundedCycle(Function &F, Attributor &A) { + ScalarEvolution *SE = + A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(F); + LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(F); + // If either SCEV or LoopInfo is not available for the function then we assume + // any cycle to be unbounded cycle. + // We use scc_iterator which uses Tarjan algorithm to find all the maximal + // SCCs.To detect if there's a cycle, we only need to find the maximal ones. + if (!SE || !LI) { + for (scc_iterator<Function *> SCCI = scc_begin(&F); !SCCI.isAtEnd(); ++SCCI) + if (SCCI.hasCycle()) + return true; + return false; + } + + // If there's irreducible control, the function may contain non-loop cycles. + if (mayContainIrreducibleControl(F, LI)) + return true; + + // Any loop that does not have a max trip count is considered unbounded cycle. + for (auto *L : LI->getLoopsInPreorder()) { + if (!SE->getSmallConstantMaxTripCount(L)) + return true; + } + return false; +} + +struct AAWillReturnImpl : public AAWillReturn { + AAWillReturnImpl(const IRPosition &IRP, Attributor &A) + : AAWillReturn(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturn::initialize(A); + + Function *F = getAnchorScope(); + if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForWillReturn = [&](Instruction &I) { + IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I)); + const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos); + if (WillReturnAA.isKnownWillReturn()) + return true; + if (!WillReturnAA.isAssumedWillReturn()) + return false; + const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos); + return NoRecurseAA.isAssumedNoRecurse(); + }; + + if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "willreturn" : "may-noreturn"; + } +}; + +struct AAWillReturnFunction final : AAWillReturnImpl { + AAWillReturnFunction(const IRPosition &IRP, Attributor &A) + : AAWillReturnImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) } +}; + +/// WillReturn attribute deduction for a call sites. +struct AAWillReturnCallSite final : AAWillReturnImpl { + AAWillReturnCallSite(const IRPosition &IRP, Attributor &A) + : AAWillReturnImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturnImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AAWillReturn::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } +}; + +/// -------------------AAReachability Attribute-------------------------- + +struct AAReachabilityImpl : AAReachability { + AAReachabilityImpl(const IRPosition &IRP, Attributor &A) + : AAReachability(IRP, A) {} + + const std::string getAsStr() const override { + // TODO: Return the number of reachable queries. + return "reachable"; + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { indicatePessimisticFixpoint(); } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } +}; + +struct AAReachabilityFunction final : public AAReachabilityImpl { + AAReachabilityFunction(const IRPosition &IRP, Attributor &A) + : AAReachabilityImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); } +}; + +/// ------------------------ NoAlias Argument Attribute ------------------------ + +struct AANoAliasImpl : AANoAlias { + AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) { + assert(getAssociatedType()->isPointerTy() && + "Noalias is a pointer attribute"); + } + + const std::string getAsStr() const override { + return getAssumed() ? "noalias" : "may-alias"; + } +}; + +/// NoAlias attribute for a floating value. +struct AANoAliasFloating final : AANoAliasImpl { + AANoAliasFloating(const IRPosition &IRP, Attributor &A) + : AANoAliasImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Value *Val = &getAssociatedValue(); + do { + CastInst *CI = dyn_cast<CastInst>(Val); + if (!CI) + break; + Value *Base = CI->getOperand(0); + if (!Base->hasOneUse()) + break; + Val = Base; + } while (true); + + if (!Val->getType()->isPointerTy()) { + indicatePessimisticFixpoint(); + return; + } + + if (isa<AllocaInst>(Val)) + indicateOptimisticFixpoint(); + else if (isa<ConstantPointerNull>(Val) && + !NullPointerIsDefined(getAnchorScope(), + Val->getType()->getPointerAddressSpace())) + indicateOptimisticFixpoint(); + else if (Val != &getAssociatedValue()) { + const auto &ValNoAliasAA = + A.getAAFor<AANoAlias>(*this, IRPosition::value(*Val)); + if (ValNoAliasAA.isKnownNoAlias()) + indicateOptimisticFixpoint(); + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Implement this. + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(noalias) + } +}; + +/// NoAlias attribute for an argument. +struct AANoAliasArgument final + : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> { + using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>; + AANoAliasArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + // See callsite argument attribute and callee argument attribute. + if (hasAttr({Attribute::ByVal})) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::update(...). + ChangeStatus updateImpl(Attributor &A) override { + // We have to make sure no-alias on the argument does not break + // synchronization when this is a callback argument, see also [1] below. + // If synchronization cannot be affected, we delegate to the base updateImpl + // function, otherwise we give up for now. + + // If the function is no-sync, no-alias cannot break synchronization. + const auto &NoSyncAA = A.getAAFor<AANoSync>( + *this, IRPosition::function_scope(getIRPosition())); + if (NoSyncAA.isAssumedNoSync()) + return Base::updateImpl(A); + + // If the argument is read-only, no-alias cannot break synchronization. + const auto &MemBehaviorAA = + A.getAAFor<AAMemoryBehavior>(*this, getIRPosition()); + if (MemBehaviorAA.isAssumedReadOnly()) + return Base::updateImpl(A); + + // If the argument is never passed through callbacks, no-alias cannot break + // synchronization. + bool AllCallSitesKnown; + if (A.checkForAllCallSites( + [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this, + true, AllCallSitesKnown)) + return Base::updateImpl(A); + + // TODO: add no-alias but make sure it doesn't break synchronization by + // introducing fake uses. See: + // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel, + // International Workshop on OpenMP 2018, + // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf + + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } +}; + +struct AANoAliasCallSiteArgument final : AANoAliasImpl { + AANoAliasCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AANoAliasImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // See callsite argument attribute and callee argument attribute. + const auto &CB = cast<CallBase>(getAnchorValue()); + if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias)) + indicateOptimisticFixpoint(); + Value &Val = getAssociatedValue(); + if (isa<ConstantPointerNull>(Val) && + !NullPointerIsDefined(getAnchorScope(), + Val.getType()->getPointerAddressSpace())) + indicateOptimisticFixpoint(); + } + + /// Determine if the underlying value may alias with the call site argument + /// \p OtherArgNo of \p ICS (= the underlying call site). + bool mayAliasWithArgument(Attributor &A, AAResults *&AAR, + const AAMemoryBehavior &MemBehaviorAA, + const CallBase &CB, unsigned OtherArgNo) { + // We do not need to worry about aliasing with the underlying IRP. + if (this->getArgNo() == (int)OtherArgNo) + return false; + + // If it is not a pointer or pointer vector we do not alias. + const Value *ArgOp = CB.getArgOperand(OtherArgNo); + if (!ArgOp->getType()->isPtrOrPtrVectorTy()) + return false; + + auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>( + *this, IRPosition::callsite_argument(CB, OtherArgNo), + /* TrackDependence */ false); + + // If the argument is readnone, there is no read-write aliasing. + if (CBArgMemBehaviorAA.isAssumedReadNone()) { + A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL); + return false; + } + + // If the argument is readonly and the underlying value is readonly, there + // is no read-write aliasing. + bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly(); + if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) { + A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL); + A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL); + return false; + } + + // We have to utilize actual alias analysis queries so we need the object. + if (!AAR) + AAR = A.getInfoCache().getAAResultsForFunction(*getAnchorScope()); + + // Try to rule it out at the call site. + bool IsAliasing = !AAR || !AAR->isNoAlias(&getAssociatedValue(), ArgOp); + LLVM_DEBUG(dbgs() << "[NoAliasCSArg] Check alias between " + "callsite arguments: " + << getAssociatedValue() << " " << *ArgOp << " => " + << (IsAliasing ? "" : "no-") << "alias \n"); + + return IsAliasing; + } + + bool + isKnownNoAliasDueToNoAliasPreservation(Attributor &A, AAResults *&AAR, + const AAMemoryBehavior &MemBehaviorAA, + const AANoAlias &NoAliasAA) { + // We can deduce "noalias" if the following conditions hold. + // (i) Associated value is assumed to be noalias in the definition. + // (ii) Associated value is assumed to be no-capture in all the uses + // possibly executed before this callsite. + // (iii) There is no other pointer argument which could alias with the + // value. + + bool AssociatedValueIsNoAliasAtDef = NoAliasAA.isAssumedNoAlias(); + if (!AssociatedValueIsNoAliasAtDef) { + LLVM_DEBUG(dbgs() << "[AANoAlias] " << getAssociatedValue() + << " is not no-alias at the definition\n"); + return false; + } + + A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL); + + const IRPosition &VIRP = IRPosition::value(getAssociatedValue()); + auto &NoCaptureAA = + A.getAAFor<AANoCapture>(*this, VIRP, /* TrackDependence */ false); + // Check whether the value is captured in the scope using AANoCapture. + // Look at CFG and check only uses possibly executed before this + // callsite. + auto UsePred = [&](const Use &U, bool &Follow) -> bool { + Instruction *UserI = cast<Instruction>(U.getUser()); + + // If user if curr instr and only use. + if (UserI == getCtxI() && UserI->hasOneUse()) + return true; + + const Function *ScopeFn = VIRP.getAnchorScope(); + if (ScopeFn) { + const auto &ReachabilityAA = + A.getAAFor<AAReachability>(*this, IRPosition::function(*ScopeFn)); + + if (!ReachabilityAA.isAssumedReachable(UserI, getCtxI())) + return true; + + if (auto *CB = dyn_cast<CallBase>(UserI)) { + if (CB->isArgOperand(&U)) { + + unsigned ArgNo = CB->getArgOperandNo(&U); + + const auto &NoCaptureAA = A.getAAFor<AANoCapture>( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + + if (NoCaptureAA.isAssumedNoCapture()) + return true; + } + } + } + + // For cases which can potentially have more users + if (isa<GetElementPtrInst>(U) || isa<BitCastInst>(U) || isa<PHINode>(U) || + isa<SelectInst>(U)) { + Follow = true; + return true; + } + + LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n"); + return false; + }; + + if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + if (!A.checkForAllUses(UsePred, *this, getAssociatedValue())) { + LLVM_DEBUG( + dbgs() << "[AANoAliasCSArg] " << getAssociatedValue() + << " cannot be noalias as it is potentially captured\n"); + return false; + } + } + A.recordDependence(NoCaptureAA, *this, DepClassTy::OPTIONAL); + + // Check there is no other pointer argument which could alias with the + // value passed at this call site. + // TODO: AbstractCallSite + const auto &CB = cast<CallBase>(getAnchorValue()); + for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands(); + OtherArgNo++) + if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo)) + return false; + + return true; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // If the argument is readnone we are done as there are no accesses via the + // argument. + auto &MemBehaviorAA = + A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), + /* TrackDependence */ false); + if (MemBehaviorAA.isAssumedReadNone()) { + A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL); + return ChangeStatus::UNCHANGED; + } + + const IRPosition &VIRP = IRPosition::value(getAssociatedValue()); + const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, VIRP, + /* TrackDependence */ false); + + AAResults *AAR = nullptr; + if (isKnownNoAliasDueToNoAliasPreservation(A, AAR, MemBehaviorAA, + NoAliasAA)) { + LLVM_DEBUG( + dbgs() << "[AANoAlias] No-Alias deduced via no-alias preservation\n"); + return ChangeStatus::UNCHANGED; + } + + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) } +}; + +/// NoAlias attribute for function return value. +struct AANoAliasReturned final : AANoAliasImpl { + AANoAliasReturned(const IRPosition &IRP, Attributor &A) + : AANoAliasImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + virtual ChangeStatus updateImpl(Attributor &A) override { + + auto CheckReturnValue = [&](Value &RV) -> bool { + if (Constant *C = dyn_cast<Constant>(&RV)) + if (C->isNullValue() || isa<UndefValue>(C)) + return true; + + /// For now, we can only deduce noalias if we have call sites. + /// FIXME: add more support. + if (!isa<CallBase>(&RV)) + return false; + + const IRPosition &RVPos = IRPosition::value(RV); + const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos); + if (!NoAliasAA.isAssumedNoAlias()) + return false; + + const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos); + return NoCaptureAA.isAssumedNoCaptureMaybeReturned(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) } +}; + +/// NoAlias attribute deduction for a call site return value. +struct AANoAliasCallSiteReturned final : AANoAliasImpl { + AANoAliasCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AANoAliasImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } +}; + +/// -------------------AAIsDead Function Attribute----------------------- + +struct AAIsDeadValueImpl : public AAIsDead { + AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {} + + /// See AAIsDead::isAssumedDead(). + bool isAssumedDead() const override { return getAssumed(); } + + /// See AAIsDead::isKnownDead(). + bool isKnownDead() const override { return getKnown(); } + + /// See AAIsDead::isAssumedDead(BasicBlock *). + bool isAssumedDead(const BasicBlock *BB) const override { return false; } + + /// See AAIsDead::isKnownDead(BasicBlock *). + bool isKnownDead(const BasicBlock *BB) const override { return false; } + + /// See AAIsDead::isAssumedDead(Instruction *I). + bool isAssumedDead(const Instruction *I) const override { + return I == getCtxI() && isAssumedDead(); + } + + /// See AAIsDead::isKnownDead(Instruction *I). + bool isKnownDead(const Instruction *I) const override { + return isAssumedDead(I) && getKnown(); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return isAssumedDead() ? "assumed-dead" : "assumed-live"; + } + + /// Check if all uses are assumed dead. + bool areAllUsesAssumedDead(Attributor &A, Value &V) { + auto UsePred = [&](const Use &U, bool &Follow) { return false; }; + // Explicitly set the dependence class to required because we want a long + // chain of N dependent instructions to be considered live as soon as one is + // without going through N update cycles. This is not required for + // correctness. + return A.checkForAllUses(UsePred, *this, V, DepClassTy::REQUIRED); + } + + /// Determine if \p I is assumed to be side-effect free. + bool isAssumedSideEffectFree(Attributor &A, Instruction *I) { + if (!I || wouldInstructionBeTriviallyDead(I)) + return true; + + auto *CB = dyn_cast<CallBase>(I); + if (!CB || isa<IntrinsicInst>(CB)) + return false; + + const IRPosition &CallIRP = IRPosition::callsite_function(*CB); + const auto &NoUnwindAA = A.getAndUpdateAAFor<AANoUnwind>( + *this, CallIRP, /* TrackDependence */ false); + if (!NoUnwindAA.isAssumedNoUnwind()) + return false; + if (!NoUnwindAA.isKnownNoUnwind()) + A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL); + + const auto &MemBehaviorAA = A.getAndUpdateAAFor<AAMemoryBehavior>( + *this, CallIRP, /* TrackDependence */ false); + if (MemBehaviorAA.isAssumedReadOnly()) { + if (!MemBehaviorAA.isKnownReadOnly()) + A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL); + return true; + } + return false; + } +}; + +struct AAIsDeadFloating : public AAIsDeadValueImpl { + AAIsDeadFloating(const IRPosition &IRP, Attributor &A) + : AAIsDeadValueImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (isa<UndefValue>(getAssociatedValue())) { + indicatePessimisticFixpoint(); + return; + } + + Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()); + if (!isAssumedSideEffectFree(A, I)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()); + if (!isAssumedSideEffectFree(A, I)) + return indicatePessimisticFixpoint(); + + if (!areAllUsesAssumedDead(A, getAssociatedValue())) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + Value &V = getAssociatedValue(); + if (auto *I = dyn_cast<Instruction>(&V)) { + // If we get here we basically know the users are all dead. We check if + // isAssumedSideEffectFree returns true here again because it might not be + // the case and only the users are dead but the instruction (=call) is + // still needed. + if (isAssumedSideEffectFree(A, I) && !isa<InvokeInst>(I)) { + A.deleteAfterManifest(*I); + return ChangeStatus::CHANGED; + } + } + if (V.use_empty()) + return ChangeStatus::UNCHANGED; + + bool UsedAssumedInformation = false; + Optional<Constant *> C = + A.getAssumedConstant(V, *this, UsedAssumedInformation); + if (C.hasValue() && C.getValue()) + return ChangeStatus::UNCHANGED; + + // Replace the value with undef as it is dead but keep droppable uses around + // as they provide information we don't want to give up on just yet. + UndefValue &UV = *UndefValue::get(V.getType()); + bool AnyChange = + A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false); + return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(IsDead) + } +}; + +struct AAIsDeadArgument : public AAIsDeadFloating { + AAIsDeadArgument(const IRPosition &IRP, Attributor &A) + : AAIsDeadFloating(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (!A.isFunctionIPOAmendable(*getAnchorScope())) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = AAIsDeadFloating::manifest(A); + Argument &Arg = *getAssociatedArgument(); + if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {})) + if (A.registerFunctionSignatureRewrite( + Arg, /* ReplacementTypes */ {}, + Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{}, + Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) { + Arg.dropDroppableUses(); + return ChangeStatus::CHANGED; + } + return Changed; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(IsDead) } +}; + +struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { + AAIsDeadCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAIsDeadValueImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (isa<UndefValue>(getAssociatedValue())) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), static_cast<const AAIsDead::StateType &>(ArgAA.getState())); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + CallBase &CB = cast<CallBase>(getAnchorValue()); + Use &U = CB.getArgOperandUse(getArgNo()); + assert(!isa<UndefValue>(U.get()) && + "Expected undef values to be filtered out!"); + UndefValue &UV = *UndefValue::get(U->getType()); + if (A.changeUseAfterManifest(U, UV)) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(IsDead) } +}; + +struct AAIsDeadCallSiteReturned : public AAIsDeadFloating { + AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {} + + /// See AAIsDead::isAssumedDead(). + bool isAssumedDead() const override { + return AAIsDeadFloating::isAssumedDead() && IsAssumedSideEffectFree; + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (isa<UndefValue>(getAssociatedValue())) { + indicatePessimisticFixpoint(); + return; + } + + // We track this separately as a secondary state. + IsAssumedSideEffectFree = isAssumedSideEffectFree(A, getCtxI()); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + if (IsAssumedSideEffectFree && !isAssumedSideEffectFree(A, getCtxI())) { + IsAssumedSideEffectFree = false; + Changed = ChangeStatus::CHANGED; + } + + if (!areAllUsesAssumedDead(A, getAssociatedValue())) + return indicatePessimisticFixpoint(); + return Changed; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (IsAssumedSideEffectFree) + STATS_DECLTRACK_CSRET_ATTR(IsDead) + else + STATS_DECLTRACK_CSRET_ATTR(UnusedResult) + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return isAssumedDead() + ? "assumed-dead" + : (getAssumed() ? "assumed-dead-users" : "assumed-live"); + } + +private: + bool IsAssumedSideEffectFree; +}; + +struct AAIsDeadReturned : public AAIsDeadValueImpl { + AAIsDeadReturned(const IRPosition &IRP, Attributor &A) + : AAIsDeadValueImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + + A.checkForAllInstructions([](Instruction &) { return true; }, *this, + {Instruction::Ret}); + + auto PredForCallSite = [&](AbstractCallSite ACS) { + if (ACS.isCallbackCall() || !ACS.getInstruction()) + return false; + return areAllUsesAssumedDead(A, *ACS.getInstruction()); + }; + + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(PredForCallSite, *this, true, + AllCallSitesKnown)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // TODO: Rewrite the signature to return void? + bool AnyChange = false; + UndefValue &UV = *UndefValue::get(getAssociatedFunction()->getReturnType()); + auto RetInstPred = [&](Instruction &I) { + ReturnInst &RI = cast<ReturnInst>(I); + if (!isa<UndefValue>(RI.getReturnValue())) + AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV); + return true; + }; + A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret}); + return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(IsDead) } +}; + +struct AAIsDeadFunction : public AAIsDead { + AAIsDeadFunction(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + const Function *F = getAnchorScope(); + if (F && !F->isDeclaration()) { + ToBeExploredFrom.insert(&F->getEntryBlock().front()); + assumeLive(A, F->getEntryBlock()); + } + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" + + std::to_string(getAnchorScope()->size()) + "][#TBEP " + + std::to_string(ToBeExploredFrom.size()) + "][#KDE " + + std::to_string(KnownDeadEnds.size()) + "]"; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function &F = *getAnchorScope(); + + if (AssumedLiveBlocks.empty()) { + A.deleteAfterManifest(F); + return ChangeStatus::CHANGED; + } + + // Flag to determine if we can change an invoke to a call assuming the + // callee is nounwind. This is not possible if the personality of the + // function allows to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + KnownDeadEnds.set_union(ToBeExploredFrom); + for (const Instruction *DeadEndI : KnownDeadEnds) { + auto *CB = dyn_cast<CallBase>(DeadEndI); + if (!CB) + continue; + const auto &NoReturnAA = A.getAndUpdateAAFor<AANoReturn>( + *this, IRPosition::callsite_function(*CB), /* TrackDependence */ true, + DepClassTy::OPTIONAL); + bool MayReturn = !NoReturnAA.isAssumedNoReturn(); + if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB))) + continue; + + if (auto *II = dyn_cast<InvokeInst>(DeadEndI)) + A.registerInvokeWithDeadSuccessor(const_cast<InvokeInst &>(*II)); + else + A.changeToUnreachableAfterManifest( + const_cast<Instruction *>(DeadEndI->getNextNode())); + HasChanged = ChangeStatus::CHANGED; + } + + STATS_DECL(AAIsDead, BasicBlock, "Number of dead basic blocks deleted."); + for (BasicBlock &BB : F) + if (!AssumedLiveBlocks.count(&BB)) { + A.deleteAfterManifest(BB); + ++BUILD_STAT_NAME(AAIsDead, BasicBlock); + } + + return HasChanged; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + + /// Returns true if the function is assumed dead. + bool isAssumedDead() const override { return false; } + + /// See AAIsDead::isKnownDead(). + bool isKnownDead() const override { return false; } + + /// See AAIsDead::isAssumedDead(BasicBlock *). + bool isAssumedDead(const BasicBlock *BB) const override { + assert(BB->getParent() == getAnchorScope() && + "BB must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + return !AssumedLiveBlocks.count(BB); + } + + /// See AAIsDead::isKnownDead(BasicBlock *). + bool isKnownDead(const BasicBlock *BB) const override { + return getKnown() && isAssumedDead(BB); + } + + /// See AAIsDead::isAssumed(Instruction *I). + bool isAssumedDead(const Instruction *I) const override { + assert(I->getParent()->getParent() == getAnchorScope() && + "Instruction must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + + // If it is not in AssumedLiveBlocks then it for sure dead. + // Otherwise, it can still be after noreturn call in a live block. + if (!AssumedLiveBlocks.count(I->getParent())) + return true; + + // If it is not after a liveness barrier it is live. + const Instruction *PrevI = I->getPrevNode(); + while (PrevI) { + if (KnownDeadEnds.count(PrevI) || ToBeExploredFrom.count(PrevI)) + return true; + PrevI = PrevI->getPrevNode(); + } + return false; + } + + /// See AAIsDead::isKnownDead(Instruction *I). + bool isKnownDead(const Instruction *I) const override { + return getKnown() && isAssumedDead(I); + } + + /// Assume \p BB is (partially) live now and indicate to the Attributor \p A + /// that internal function called from \p BB should now be looked at. + bool assumeLive(Attributor &A, const BasicBlock &BB) { + if (!AssumedLiveBlocks.insert(&BB).second) + return false; + + // We assume that all of BB is (probably) live now and if there are calls to + // internal functions we will assume that those are now live as well. This + // is a performance optimization for blocks with calls to a lot of internal + // functions. It can however cause dead functions to be treated as live. + for (const Instruction &I : BB) + if (const auto *CB = dyn_cast<CallBase>(&I)) + if (const Function *F = CB->getCalledFunction()) + if (F->hasLocalLinkage()) + A.markLiveInternalFunction(*F); + return true; + } + + /// Collection of instructions that need to be explored again, e.g., we + /// did assume they do not transfer control to (one of their) successors. + SmallSetVector<const Instruction *, 8> ToBeExploredFrom; + + /// Collection of instructions that are known to not transfer control. + SmallSetVector<const Instruction *, 8> KnownDeadEnds; + + /// Collection of all assumed live BasicBlocks. + DenseSet<const BasicBlock *> AssumedLiveBlocks; +}; + +static bool +identifyAliveSuccessors(Attributor &A, const CallBase &CB, + AbstractAttribute &AA, + SmallVectorImpl<const Instruction *> &AliveSuccessors) { + const IRPosition &IPos = IRPosition::callsite_function(CB); + + const auto &NoReturnAA = A.getAndUpdateAAFor<AANoReturn>( + AA, IPos, /* TrackDependence */ true, DepClassTy::OPTIONAL); + if (NoReturnAA.isAssumedNoReturn()) + return !NoReturnAA.isKnownNoReturn(); + if (CB.isTerminator()) + AliveSuccessors.push_back(&CB.getSuccessor(0)->front()); + else + AliveSuccessors.push_back(CB.getNextNode()); + return false; +} + +static bool +identifyAliveSuccessors(Attributor &A, const InvokeInst &II, + AbstractAttribute &AA, + SmallVectorImpl<const Instruction *> &AliveSuccessors) { + bool UsedAssumedInformation = + identifyAliveSuccessors(A, cast<CallBase>(II), AA, AliveSuccessors); + + // First, determine if we can change an invoke to a call assuming the + // callee is nounwind. This is not possible if the personality of the + // function allows to catch asynchronous exceptions. + if (AAIsDeadFunction::mayCatchAsynchronousExceptions(*II.getFunction())) { + AliveSuccessors.push_back(&II.getUnwindDest()->front()); + } else { + const IRPosition &IPos = IRPosition::callsite_function(II); + const auto &AANoUnw = A.getAndUpdateAAFor<AANoUnwind>( + AA, IPos, /* TrackDependence */ true, DepClassTy::OPTIONAL); + if (AANoUnw.isAssumedNoUnwind()) { + UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind(); + } else { + AliveSuccessors.push_back(&II.getUnwindDest()->front()); + } + } + return UsedAssumedInformation; +} + +static bool +identifyAliveSuccessors(Attributor &A, const BranchInst &BI, + AbstractAttribute &AA, + SmallVectorImpl<const Instruction *> &AliveSuccessors) { + bool UsedAssumedInformation = false; + if (BI.getNumSuccessors() == 1) { + AliveSuccessors.push_back(&BI.getSuccessor(0)->front()); + } else { + Optional<ConstantInt *> CI = getAssumedConstantInt( + A, *BI.getCondition(), AA, UsedAssumedInformation); + if (!CI.hasValue()) { + // No value yet, assume both edges are dead. + } else if (CI.getValue()) { + const BasicBlock *SuccBB = + BI.getSuccessor(1 - CI.getValue()->getZExtValue()); + AliveSuccessors.push_back(&SuccBB->front()); + } else { + AliveSuccessors.push_back(&BI.getSuccessor(0)->front()); + AliveSuccessors.push_back(&BI.getSuccessor(1)->front()); + UsedAssumedInformation = false; + } + } + return UsedAssumedInformation; +} + +static bool +identifyAliveSuccessors(Attributor &A, const SwitchInst &SI, + AbstractAttribute &AA, + SmallVectorImpl<const Instruction *> &AliveSuccessors) { + bool UsedAssumedInformation = false; + Optional<ConstantInt *> CI = + getAssumedConstantInt(A, *SI.getCondition(), AA, UsedAssumedInformation); + if (!CI.hasValue()) { + // No value yet, assume all edges are dead. + } else if (CI.getValue()) { + for (auto &CaseIt : SI.cases()) { + if (CaseIt.getCaseValue() == CI.getValue()) { + AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front()); + return UsedAssumedInformation; + } + } + AliveSuccessors.push_back(&SI.getDefaultDest()->front()); + return UsedAssumedInformation; + } else { + for (const BasicBlock *SuccBB : successors(SI.getParent())) + AliveSuccessors.push_back(&SuccBB->front()); + } + return UsedAssumedInformation; +} + +ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) { + ChangeStatus Change = ChangeStatus::UNCHANGED; + + LLVM_DEBUG(dbgs() << "[AAIsDead] Live [" << AssumedLiveBlocks.size() << "/" + << getAnchorScope()->size() << "] BBs and " + << ToBeExploredFrom.size() << " exploration points and " + << KnownDeadEnds.size() << " known dead ends\n"); + + // Copy and clear the list of instructions we need to explore from. It is + // refilled with instructions the next update has to look at. + SmallVector<const Instruction *, 8> Worklist(ToBeExploredFrom.begin(), + ToBeExploredFrom.end()); + decltype(ToBeExploredFrom) NewToBeExploredFrom; + + SmallVector<const Instruction *, 8> AliveSuccessors; + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n"); + + AliveSuccessors.clear(); + + bool UsedAssumedInformation = false; + switch (I->getOpcode()) { + // TODO: look for (assumed) UB to backwards propagate "deadness". + default: + if (I->isTerminator()) { + for (const BasicBlock *SuccBB : successors(I->getParent())) + AliveSuccessors.push_back(&SuccBB->front()); + } else { + AliveSuccessors.push_back(I->getNextNode()); + } + break; + case Instruction::Call: + UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I), + *this, AliveSuccessors); + break; + case Instruction::Invoke: + UsedAssumedInformation = identifyAliveSuccessors(A, cast<InvokeInst>(*I), + *this, AliveSuccessors); + break; + case Instruction::Br: + UsedAssumedInformation = identifyAliveSuccessors(A, cast<BranchInst>(*I), + *this, AliveSuccessors); + break; + case Instruction::Switch: + UsedAssumedInformation = identifyAliveSuccessors(A, cast<SwitchInst>(*I), + *this, AliveSuccessors); + break; + } + + if (UsedAssumedInformation) { + NewToBeExploredFrom.insert(I); + } else { + Change = ChangeStatus::CHANGED; + if (AliveSuccessors.empty() || + (I->isTerminator() && AliveSuccessors.size() < I->getNumSuccessors())) + KnownDeadEnds.insert(I); + } + + LLVM_DEBUG(dbgs() << "[AAIsDead] #AliveSuccessors: " + << AliveSuccessors.size() << " UsedAssumedInformation: " + << UsedAssumedInformation << "\n"); + + for (const Instruction *AliveSuccessor : AliveSuccessors) { + if (!I->isTerminator()) { + assert(AliveSuccessors.size() == 1 && + "Non-terminator expected to have a single successor!"); + Worklist.push_back(AliveSuccessor); + } else { + if (assumeLive(A, *AliveSuccessor->getParent())) + Worklist.push_back(AliveSuccessor); + } + } + } + + ToBeExploredFrom = std::move(NewToBeExploredFrom); + + // If we know everything is live there is no need to query for liveness. + // Instead, indicating a pessimistic fixpoint will cause the state to be + // "invalid" and all queries to be answered conservatively without lookups. + // To be in this state we have to (1) finished the exploration and (3) not + // discovered any non-trivial dead end and (2) not ruled unreachable code + // dead. + if (ToBeExploredFrom.empty() && + getAnchorScope()->size() == AssumedLiveBlocks.size() && + llvm::all_of(KnownDeadEnds, [](const Instruction *DeadEndI) { + return DeadEndI->isTerminator() && DeadEndI->getNumSuccessors() == 0; + })) + return indicatePessimisticFixpoint(); + return Change; +} + +/// Liveness information for a call sites. +struct AAIsDeadCallSite final : AAIsDeadFunction { + AAIsDeadCallSite(const IRPosition &IRP, Attributor &A) + : AAIsDeadFunction(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for liveness are not " + "supported for call sites yet!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// -------------------- Dereferenceable Argument Attribute -------------------- + +template <> +ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S, + const DerefState &R) { + ChangeStatus CS0 = + clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState); + ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState); + return CS0 | CS1; +} + +struct AADereferenceableImpl : AADereferenceable { + AADereferenceableImpl(const IRPosition &IRP, Attributor &A) + : AADereferenceable(IRP, A) {} + using StateType = DerefState; + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + SmallVector<Attribute, 4> Attrs; + getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, + Attrs, /* IgnoreSubsumingPositions */ false, &A); + for (const Attribute &Attr : Attrs) + takeKnownDerefBytesMaximum(Attr.getValueAsInt()); + + const IRPosition &IRP = this->getIRPosition(); + NonNullAA = &A.getAAFor<AANonNull>(*this, IRP, + /* TrackDependence */ false); + + bool CanBeNull; + takeKnownDerefBytesMaximum( + IRP.getAssociatedValue().getPointerDereferenceableBytes( + A.getDataLayout(), CanBeNull)); + + bool IsFnInterface = IRP.isFnInterfaceKind(); + Function *FnScope = IRP.getAnchorScope(); + if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) { + indicatePessimisticFixpoint(); + return; + } + + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); + } + + /// See AbstractAttribute::getState() + /// { + StateType &getState() override { return *this; } + const StateType &getState() const override { return *this; } + /// } + + /// Helper function for collecting accessed bytes in must-be-executed-context + void addAccessedBytesForUse(Attributor &A, const Use *U, const Instruction *I, + DerefState &State) { + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return; + + Type *PtrTy = UseV->getType(); + const DataLayout &DL = A.getDataLayout(); + int64_t Offset; + if (const Value *Base = getBasePointerOfAccessPointerOperand( + I, Offset, DL, /*AllowNonInbounds*/ true)) { + if (Base == &getAssociatedValue() && + getPointerOperand(I, /* AllowVolatile */ false) == UseV) { + uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType()); + State.addAccessedBytes(Offset, Size); + } + } + return; + } + + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AADereferenceable::StateType &State) { + bool IsNonNull = false; + bool TrackUse = false; + int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( + A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes: " << DerefBytes + << " for instruction " << *I << "\n"); + + addAccessedBytesForUse(A, U, I, State); + State.takeKnownDerefBytesMaximum(DerefBytes); + return TrackUse; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Change = AADereferenceable::manifest(A); + if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) { + removeAttrs({Attribute::DereferenceableOrNull}); + return ChangeStatus::CHANGED; + } + return Change; + } + + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + // TODO: Add *_globally support + if (isAssumedNonNull()) + Attrs.emplace_back(Attribute::getWithDereferenceableBytes( + Ctx, getAssumedDereferenceableBytes())); + else + Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes( + Ctx, getAssumedDereferenceableBytes())); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + if (!getAssumedDereferenceableBytes()) + return "unknown-dereferenceable"; + return std::string("dereferenceable") + + (isAssumedNonNull() ? "" : "_or_null") + + (isAssumedGlobal() ? "_globally" : "") + "<" + + std::to_string(getKnownDereferenceableBytes()) + "-" + + std::to_string(getAssumedDereferenceableBytes()) + ">"; + } +}; + +/// Dereferenceable attribute for a floating value. +struct AADereferenceableFloating : AADereferenceableImpl { + AADereferenceableFloating(const IRPosition &IRP, Attributor &A) + : AADereferenceableImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T, + bool Stripped) -> bool { + unsigned IdxWidth = + DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); + APInt Offset(IdxWidth, 0); + const Value *Base = + stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); + + const auto &AA = + A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base)); + int64_t DerefBytes = 0; + if (!Stripped && this == &AA) { + // Use IR information if we did not strip anything. + // TODO: track globally. + bool CanBeNull; + DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull); + T.GlobalState.indicatePessimisticFixpoint(); + } else { + const DerefState &DS = static_cast<const DerefState &>(AA.getState()); + DerefBytes = DS.DerefBytesState.getAssumed(); + T.GlobalState &= DS.GlobalState; + } + + + // For now we do not try to "increase" dereferenceability due to negative + // indices as we first have to come up with code to deal with loops and + // for overflows of the dereferenceable bytes. + int64_t OffsetSExt = Offset.getSExtValue(); + if (OffsetSExt < 0) + OffsetSExt = 0; + + T.takeAssumedDerefBytesMinimum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + + if (this == &AA) { + if (!Stripped) { + // If nothing was stripped IR information is all we got. + T.takeKnownDerefBytesMaximum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + T.indicatePessimisticFixpoint(); + } else if (OffsetSExt > 0) { + // If something was stripped but there is circular reasoning we look + // for the offset. If it is positive we basically decrease the + // dereferenceable bytes in a circluar loop now, which will simply + // drive them down to the known value in a very slow way which we + // can accelerate. + T.indicatePessimisticFixpoint(); + } + } + + return T.isValidState(); + }; + + DerefState T; + if (!genericValueTraversal<AADereferenceable, DerefState>( + A, getIRPosition(), *this, T, VisitValueCB, getCtxI())) + return indicatePessimisticFixpoint(); + + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for a return value. +struct AADereferenceableReturned final + : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl> { + AADereferenceableReturned(const IRPosition &IRP, Attributor &A) + : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl>( + IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for an argument +struct AADereferenceableArgument final + : AAArgumentFromCallSiteArguments<AADereferenceable, + AADereferenceableImpl> { + using Base = + AAArgumentFromCallSiteArguments<AADereferenceable, AADereferenceableImpl>; + AADereferenceableArgument(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for a call site argument. +struct AADereferenceableCallSiteArgument final : AADereferenceableFloating { + AADereferenceableCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AADereferenceableFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute deduction for a call site return value. +struct AADereferenceableCallSiteReturned final + : AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl> { + using Base = + AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl>; + AADereferenceableCallSiteReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(dereferenceable); + } +}; + +// ------------------------ Align Argument Attribute ------------------------ + +static unsigned getKnownAlignForUse(Attributor &A, + AbstractAttribute &QueryingAA, + Value &AssociatedValue, const Use *U, + const Instruction *I, bool &TrackUse) { + // We need to follow common pointer manipulation uses to the accesses they + // feed into. + if (isa<CastInst>(I)) { + // Follow all but ptr2int casts. + TrackUse = !isa<PtrToIntInst>(I); + return 0; + } + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { + if (GEP->hasAllConstantIndices()) { + TrackUse = true; + return 0; + } + } + + MaybeAlign MA; + if (const auto *CB = dyn_cast<CallBase>(I)) { + if (CB->isBundleOperand(U) || CB->isCallee(U)) + return 0; + + unsigned ArgNo = CB->getArgOperandNo(U); + IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo); + // As long as we only use known information there is no need to track + // dependences here. + auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP, + /* TrackDependence */ false); + MA = MaybeAlign(AlignAA.getKnownAlign()); + } + + const DataLayout &DL = A.getDataLayout(); + const Value *UseV = U->get(); + if (auto *SI = dyn_cast<StoreInst>(I)) { + if (SI->getPointerOperand() == UseV) + MA = SI->getAlign(); + } else if (auto *LI = dyn_cast<LoadInst>(I)) { + if (LI->getPointerOperand() == UseV) + MA = LI->getAlign(); + } + + if (!MA || *MA <= 1) + return 0; + + unsigned Alignment = MA->value(); + int64_t Offset; + + if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) { + if (Base == &AssociatedValue) { + // BasePointerAddr + Offset = Alignment * Q for some integer Q. + // So we can say that the maximum power of two which is a divisor of + // gcd(Offset, Alignment) is an alignment. + + uint32_t gcd = + greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment); + Alignment = llvm::PowerOf2Floor(gcd); + } + } + + return Alignment; +} + +struct AAAlignImpl : AAAlign { + AAAlignImpl(const IRPosition &IRP, Attributor &A) : AAAlign(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + SmallVector<Attribute, 4> Attrs; + getAttrs({Attribute::Alignment}, Attrs); + for (const Attribute &Attr : Attrs) + takeKnownMaximum(Attr.getValueAsInt()); + + Value &V = getAssociatedValue(); + // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int + // use of the function pointer. This was caused by D73131. We want to + // avoid this for function pointers especially because we iterate + // their uses and int2ptr is not handled. It is not a correctness + // problem though! + if (!V.getType()->getPointerElementType()->isFunctionTy()) + takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); + + if (getIRPosition().isFnInterfaceKind() && + (!getAnchorScope() || + !A.isFunctionIPOAmendable(*getAssociatedFunction()))) { + indicatePessimisticFixpoint(); + return; + } + + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus LoadStoreChanged = ChangeStatus::UNCHANGED; + + // Check for users that allow alignment annotations. + Value &AssociatedValue = getAssociatedValue(); + for (const Use &U : AssociatedValue.uses()) { + if (auto *SI = dyn_cast<StoreInst>(U.getUser())) { + if (SI->getPointerOperand() == &AssociatedValue) + if (SI->getAlignment() < getAssumedAlign()) { + STATS_DECLTRACK(AAAlign, Store, + "Number of times alignment added to a store"); + SI->setAlignment(Align(getAssumedAlign())); + LoadStoreChanged = ChangeStatus::CHANGED; + } + } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) { + if (LI->getPointerOperand() == &AssociatedValue) + if (LI->getAlignment() < getAssumedAlign()) { + LI->setAlignment(Align(getAssumedAlign())); + STATS_DECLTRACK(AAAlign, Load, + "Number of times alignment added to a load"); + LoadStoreChanged = ChangeStatus::CHANGED; + } + } + } + + ChangeStatus Changed = AAAlign::manifest(A); + + Align InheritAlign = + getAssociatedValue().getPointerAlignment(A.getDataLayout()); + if (InheritAlign >= getAssumedAlign()) + return LoadStoreChanged; + return Changed | LoadStoreChanged; + } + + // TODO: Provide a helper to determine the implied ABI alignment and check in + // the existing manifest method and a new one for AAAlignImpl that value + // to avoid making the alignment explicit if it did not improve. + + /// See AbstractAttribute::getDeducedAttributes + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + if (getAssumedAlign() > 1) + Attrs.emplace_back( + Attribute::getWithAlignment(Ctx, Align(getAssumedAlign()))); + } + + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AAAlign::StateType &State) { + bool TrackUse = false; + + unsigned int KnownAlign = + getKnownAlignForUse(A, *this, getAssociatedValue(), U, I, TrackUse); + State.takeKnownMaximum(KnownAlign); + + return TrackUse; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + + "-" + std::to_string(getAssumedAlign()) + ">") + : "unknown-align"; + } +}; + +/// Align attribute for a floating value. +struct AAAlignFloating : AAAlignImpl { + AAAlignFloating(const IRPosition &IRP, Attributor &A) : AAAlignImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, const Instruction *, + AAAlign::StateType &T, bool Stripped) -> bool { + const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // Use only IR information if we did not strip anything. + Align PA = V.getPointerAlignment(DL); + T.takeKnownMaximum(PA.value()); + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AAAlign::StateType &DS = + static_cast<const AAAlign::StateType &>(AA.getState()); + T ^= DS; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T, + VisitValueCB, getCtxI())) + return indicatePessimisticFixpoint(); + + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) } +}; + +/// Align attribute for function return value. +struct AAAlignReturned final + : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> { + AAAlignReturned(const IRPosition &IRP, Attributor &A) + : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } +}; + +/// Align attribute for function argument. +struct AAAlignArgument final + : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> { + using Base = AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>; + AAAlignArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // If the associated argument is involved in a must-tail call we give up + // because we would need to keep the argument alignments of caller and + // callee in-sync. Just does not seem worth the trouble right now. + if (A.getInfoCache().isInvolvedInMustTailCall(*getAssociatedArgument())) + return ChangeStatus::UNCHANGED; + return Base::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) } +}; + +struct AAAlignCallSiteArgument final : AAAlignFloating { + AAAlignCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAAlignFloating(IRP, A) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // If the associated argument is involved in a must-tail call we give up + // because we would need to keep the argument alignments of caller and + // callee in-sync. Just does not seem worth the trouble right now. + if (Argument *Arg = getAssociatedArgument()) + if (A.getInfoCache().isInvolvedInMustTailCall(*Arg)) + return ChangeStatus::UNCHANGED; + ChangeStatus Changed = AAAlignImpl::manifest(A); + Align InheritAlign = + getAssociatedValue().getPointerAlignment(A.getDataLayout()); + if (InheritAlign >= getAssumedAlign()) + Changed = ChangeStatus::UNCHANGED; + return Changed; + } + + /// See AbstractAttribute::updateImpl(Attributor &A). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Changed = AAAlignFloating::updateImpl(A); + if (Argument *Arg = getAssociatedArgument()) { + // We only take known information from the argument + // so we do not need to track a dependence. + const auto &ArgAlignAA = A.getAAFor<AAAlign>( + *this, IRPosition::argument(*Arg), /* TrackDependence */ false); + takeKnownMaximum(ArgAlignAA.getKnownAlign()); + } + return Changed; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) } +}; + +/// Align attribute deduction for a call site return value. +struct AAAlignCallSiteReturned final + : AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl> { + using Base = AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl>; + AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } +}; + +/// ------------------ Function No-Return Attribute ---------------------------- +struct AANoReturnImpl : public AANoReturn { + AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoReturn::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "noreturn" : "may-return"; + } + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override { + auto CheckForNoReturn = [](Instruction &) { return false; }; + if (!A.checkForAllInstructions(CheckForNoReturn, *this, + {(unsigned)Instruction::Ret})) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoReturnFunction final : AANoReturnImpl { + AANoReturnFunction(const IRPosition &IRP, Attributor &A) + : AANoReturnImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } +}; + +/// NoReturn attribute deduction for a call sites. +struct AANoReturnCallSite final : AANoReturnImpl { + AANoReturnCallSite(const IRPosition &IRP, Attributor &A) + : AANoReturnImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoReturn::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } +}; + +/// ----------------------- Variable Capturing --------------------------------- + +/// A class to hold the state of for no-capture attributes. +struct AANoCaptureImpl : public AANoCapture { + AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) { + indicateOptimisticFixpoint(); + return; + } + Function *AnchorScope = getAnchorScope(); + if (isFnInterfaceKind() && + (!AnchorScope || !A.isFunctionIPOAmendable(*AnchorScope))) { + indicatePessimisticFixpoint(); + return; + } + + // You cannot "capture" null in the default address space. + if (isa<ConstantPointerNull>(getAssociatedValue()) && + getAssociatedValue().getType()->getPointerAddressSpace() == 0) { + indicateOptimisticFixpoint(); + return; + } + + const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope; + + // Check what state the associated function can actually capture. + if (F) + determineFunctionCaptureCapabilities(getIRPosition(), *F, *this); + else + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...). + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + if (!isAssumedNoCaptureMaybeReturned()) + return; + + if (getArgNo() >= 0) { + if (isAssumedNoCapture()) + Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); + else if (ManifestInternal) + Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned")); + } + } + + /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known + /// depending on the ability of the function associated with \p IRP to capture + /// state in memory and through "returning/throwing", respectively. + static void determineFunctionCaptureCapabilities(const IRPosition &IRP, + const Function &F, + BitIntegerState &State) { + // TODO: Once we have memory behavior attributes we should use them here. + + // If we know we cannot communicate or write to memory, we do not care about + // ptr2int anymore. + if (F.onlyReadsMemory() && F.doesNotThrow() && + F.getReturnType()->isVoidTy()) { + State.addKnownBits(NO_CAPTURE); + return; + } + + // A function cannot capture state in memory if it only reads memory, it can + // however return/throw state and the state might be influenced by the + // pointer value, e.g., loading from a returned pointer might reveal a bit. + if (F.onlyReadsMemory()) + State.addKnownBits(NOT_CAPTURED_IN_MEM); + + // A function cannot communicate state back if it does not through + // exceptions and doesn not return values. + if (F.doesNotThrow() && F.getReturnType()->isVoidTy()) + State.addKnownBits(NOT_CAPTURED_IN_RET); + + // Check existing "returned" attributes. + int ArgNo = IRP.getArgNo(); + if (F.doesNotThrow() && ArgNo >= 0) { + for (unsigned u = 0, e = F.arg_size(); u < e; ++u) + if (F.hasParamAttribute(u, Attribute::Returned)) { + if (u == unsigned(ArgNo)) + State.removeAssumedBits(NOT_CAPTURED_IN_RET); + else if (F.onlyReadsMemory()) + State.addKnownBits(NO_CAPTURE); + else + State.addKnownBits(NOT_CAPTURED_IN_RET); + break; + } + } + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isKnownNoCapture()) + return "known not-captured"; + if (isAssumedNoCapture()) + return "assumed not-captured"; + if (isKnownNoCaptureMaybeReturned()) + return "known not-captured-maybe-returned"; + if (isAssumedNoCaptureMaybeReturned()) + return "assumed not-captured-maybe-returned"; + return "assumed-captured"; + } +}; + +/// Attributor-aware capture tracker. +struct AACaptureUseTracker final : public CaptureTracker { + + /// Create a capture tracker that can lookup in-flight abstract attributes + /// through the Attributor \p A. + /// + /// If a use leads to a potential capture, \p CapturedInMemory is set and the + /// search is stopped. If a use leads to a return instruction, + /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. + /// If a use leads to a ptr2int which may capture the value, + /// \p CapturedInInteger is set. If a use is found that is currently assumed + /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies + /// set. All values in \p PotentialCopies are later tracked as well. For every + /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, + /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger + /// conservatively set to true. + AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, + const AAIsDead &IsDeadAA, AANoCapture::StateType &State, + SmallVectorImpl<const Value *> &PotentialCopies, + unsigned &RemainingUsesToExplore) + : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), + PotentialCopies(PotentialCopies), + RemainingUsesToExplore(RemainingUsesToExplore) {} + + /// Determine if \p V maybe captured. *Also updates the state!* + bool valueMayBeCaptured(const Value *V) { + if (V->getType()->isPointerTy()) { + PointerMayBeCaptured(V, this); + } else { + State.indicatePessimisticFixpoint(); + } + return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + + /// See CaptureTracker::tooManyUses(). + void tooManyUses() override { + State.removeAssumedBits(AANoCapture::NO_CAPTURE); + } + + bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { + if (CaptureTracker::isDereferenceableOrNull(O, DL)) + return true; + const auto &DerefAA = A.getAAFor<AADereferenceable>( + NoCaptureAA, IRPosition::value(*O), /* TrackDependence */ true, + DepClassTy::OPTIONAL); + return DerefAA.getAssumedDereferenceableBytes(); + } + + /// See CaptureTracker::captured(...). + bool captured(const Use *U) override { + Instruction *UInst = cast<Instruction>(U->getUser()); + LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst + << "\n"); + + // Because we may reuse the tracker multiple times we keep track of the + // number of explored uses ourselves as well. + if (RemainingUsesToExplore-- == 0) { + LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + // Deal with ptr2int by following uses. + if (isa<PtrToIntInst>(UInst)) { + LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); + return valueMayBeCaptured(UInst); + } + + // Explicitly catch return instructions. + if (isa<ReturnInst>(UInst)) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ true); + + // For now we only use special logic for call sites. However, the tracker + // itself knows about a lot of other non-capturing cases already. + auto *CB = dyn_cast<CallBase>(UInst); + if (!CB || !CB->isArgOperand(U)) + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + + unsigned ArgNo = CB->getArgOperandNo(U); + const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo); + // If we have a abstract no-capture attribute for the argument we can use + // it to justify a non-capture attribute here. This allows recursion! + auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos); + if (ArgNoCaptureAA.isAssumedNoCapture()) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + addPotentialCopy(*CB); + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + } + + // Lastly, we could not find a reason no-capture can be assumed so we don't. + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + /// Register \p CS as potential copy of the value we are checking. + void addPotentialCopy(CallBase &CB) { PotentialCopies.push_back(&CB); } + + /// See CaptureTracker::shouldExplore(...). + bool shouldExplore(const Use *U) override { + // Check liveness and ignore droppable users. + return !U->getUser()->isDroppable() && + !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA); + } + + /// Update the state according to \p CapturedInMem, \p CapturedInInt, and + /// \p CapturedInRet, then return the appropriate value for use in the + /// CaptureTracker::captured() interface. + bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, + bool CapturedInRet) { + LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " + << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); + if (CapturedInMem) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM); + if (CapturedInInt) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); + if (CapturedInRet) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); + return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + +private: + /// The attributor providing in-flight abstract attributes. + Attributor &A; + + /// The abstract attribute currently updated. + AANoCapture &NoCaptureAA; + + /// The abstract liveness state. + const AAIsDead &IsDeadAA; + + /// The state currently updated. + AANoCapture::StateType &State; + + /// Set of potential copies of the tracked value. + SmallVectorImpl<const Value *> &PotentialCopies; + + /// Global counter to limit the number of explored uses. + unsigned &RemainingUsesToExplore; +}; + +ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { + const IRPosition &IRP = getIRPosition(); + const Value *V = + getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + if (!V) + return indicatePessimisticFixpoint(); + + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + assert(F && "Expected a function!"); + const IRPosition &FnPos = IRPosition::function(*F); + const auto &IsDeadAA = + A.getAAFor<AAIsDead>(*this, FnPos, /* TrackDependence */ false); + + AANoCapture::StateType T; + + // Readonly means we cannot capture through memory. + const auto &FnMemAA = + A.getAAFor<AAMemoryBehavior>(*this, FnPos, /* TrackDependence */ false); + if (FnMemAA.isAssumedReadOnly()) { + T.addKnownBits(NOT_CAPTURED_IN_MEM); + if (FnMemAA.isKnownReadOnly()) + addKnownBits(NOT_CAPTURED_IN_MEM); + else + A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL); + } + + // Make sure all returned values are different than the underlying value. + // TODO: we could do this in a more sophisticated way inside + // AAReturnedValues, e.g., track all values that escape through returns + // directly somehow. + auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + bool SeenConstant = false; + for (auto &It : RVAA.returned_values()) { + if (isa<Constant>(It.first)) { + if (SeenConstant) + return false; + SeenConstant = true; + } else if (!isa<Argument>(It.first) || + It.first == getAssociatedArgument()) + return false; + } + return true; + }; + + const auto &NoUnwindAA = A.getAAFor<AANoUnwind>( + *this, FnPos, /* TrackDependence */ true, DepClassTy::OPTIONAL); + if (NoUnwindAA.isAssumedNoUnwind()) { + bool IsVoidTy = F->getReturnType()->isVoidTy(); + const AAReturnedValues *RVAA = + IsVoidTy ? nullptr + : &A.getAAFor<AAReturnedValues>(*this, FnPos, + /* TrackDependence */ true, + DepClassTy::OPTIONAL); + if (IsVoidTy || CheckReturnedArgs(*RVAA)) { + T.addKnownBits(NOT_CAPTURED_IN_RET); + if (T.isKnown(NOT_CAPTURED_IN_MEM)) + return ChangeStatus::UNCHANGED; + if (NoUnwindAA.isKnownNoUnwind() && + (IsVoidTy || RVAA->getState().isAtFixpoint())) { + addKnownBits(NOT_CAPTURED_IN_RET); + if (isKnown(NOT_CAPTURED_IN_MEM)) + return indicateOptimisticFixpoint(); + } + } + } + + // Use the CaptureTracker interface and logic with the specialized tracker, + // defined in AACaptureUseTracker, that can look at in-flight abstract + // attributes and directly updates the assumed state. + SmallVector<const Value *, 4> PotentialCopies; + unsigned RemainingUsesToExplore = + getDefaultMaxUsesToExploreForCaptureTracking(); + AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, + RemainingUsesToExplore); + + // Check all potential copies of the associated value until we can assume + // none will be captured or we have to assume at least one might be. + unsigned Idx = 0; + PotentialCopies.push_back(V); + while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) + Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); + + AANoCapture::StateType &S = getState(); + auto Assumed = S.getAssumed(); + S.intersectAssumedBits(T.getAssumed()); + if (!isAssumedNoCaptureMaybeReturned()) + return indicatePessimisticFixpoint(); + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + +/// NoCapture attribute for function arguments. +struct AANoCaptureArgument final : AANoCaptureImpl { + AANoCaptureArgument(const IRPosition &IRP, Attributor &A) + : AANoCaptureImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) } +}; + +/// NoCapture attribute for call site arguments. +struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { + AANoCaptureCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AANoCaptureImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (Argument *Arg = getAssociatedArgument()) + if (Arg->hasByValAttr()) + indicateOptimisticFixpoint(); + AANoCaptureImpl::initialize(A); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AANoCapture::StateType &>(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; +}; + +/// NoCapture attribute for floating values. +struct AANoCaptureFloating final : AANoCaptureImpl { + AANoCaptureFloating(const IRPosition &IRP, Attributor &A) + : AANoCaptureImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(nocapture) + } +}; + +/// NoCapture attribute for function return value. +struct AANoCaptureReturned final : AANoCaptureImpl { + AANoCaptureReturned(const IRPosition &IRP, Attributor &A) + : AANoCaptureImpl(IRP, A) { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// NoCapture attribute deduction for a call site return value. +struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { + AANoCaptureCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AANoCaptureImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(nocapture) + } +}; + +/// ------------------ Value Simplify Attribute ---------------------------- +struct AAValueSimplifyImpl : AAValueSimplify { + AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) + : AAValueSimplify(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (getAssociatedValue().getType()->isVoidTy()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple") + : "not-simple"; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + + /// See AAValueSimplify::getAssumedSimplifiedValue() + Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override { + if (!getAssumed()) + return const_cast<Value *>(&getAssociatedValue()); + return SimplifiedAssociatedValue; + } + + /// Helper function for querying AAValueSimplify and updating candicate. + /// \param QueryingValue Value trying to unify with SimplifiedValue + /// \param AccumulatedSimplifiedValue Current simplification result. + static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, + Value &QueryingValue, + Optional<Value *> &AccumulatedSimplifiedValue) { + // FIXME: Add a typecast support. + + auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>( + QueryingAA, IRPosition::value(QueryingValue)); + + Optional<Value *> QueryingValueSimplified = + ValueSimplifyAA.getAssumedSimplifiedValue(A); + + if (!QueryingValueSimplified.hasValue()) + return true; + + if (!QueryingValueSimplified.getValue()) + return false; + + Value &QueryingValueSimplifiedUnwrapped = + *QueryingValueSimplified.getValue(); + + if (AccumulatedSimplifiedValue.hasValue() && + !isa<UndefValue>(AccumulatedSimplifiedValue.getValue()) && + !isa<UndefValue>(QueryingValueSimplifiedUnwrapped)) + return AccumulatedSimplifiedValue == QueryingValueSimplified; + if (AccumulatedSimplifiedValue.hasValue() && + isa<UndefValue>(QueryingValueSimplifiedUnwrapped)) + return true; + + LLVM_DEBUG(dbgs() << "[ValueSimplify] " << QueryingValue + << " is assumed to be " + << QueryingValueSimplifiedUnwrapped << "\n"); + + AccumulatedSimplifiedValue = QueryingValueSimplified; + return true; + } + + bool askSimplifiedValueForAAValueConstantRange(Attributor &A) { + if (!getAssociatedValue().getType()->isIntegerTy()) + return false; + + const auto &ValueConstantRangeAA = + A.getAAFor<AAValueConstantRange>(*this, getIRPosition()); + + Optional<ConstantInt *> COpt = + ValueConstantRangeAA.getAssumedConstantInt(A); + if (COpt.hasValue()) { + if (auto *C = COpt.getValue()) + SimplifiedAssociatedValue = C; + else + return false; + } else { + SimplifiedAssociatedValue = llvm::None; + } + return true; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (SimplifiedAssociatedValue.hasValue() && + !SimplifiedAssociatedValue.getValue()) + return Changed; + + Value &V = getAssociatedValue(); + auto *C = SimplifiedAssociatedValue.hasValue() + ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue()) + : UndefValue::get(V.getType()); + if (C) { + // We can replace the AssociatedValue with the constant. + if (!V.user_empty() && &V != C && V.getType() == C->getType()) { + LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *C + << " :: " << *this << "\n"); + if (A.changeValueAfterManifest(V, *C)) + Changed = ChangeStatus::CHANGED; + } + } + + return Changed | AAValueSimplify::manifest(A); + } + + /// See AbstractState::indicatePessimisticFixpoint(...). + ChangeStatus indicatePessimisticFixpoint() override { + // NOTE: Associated value will be returned in a pessimistic fixpoint and is + // regarded as known. That's why`indicateOptimisticFixpoint` is called. + SimplifiedAssociatedValue = &getAssociatedValue(); + indicateOptimisticFixpoint(); + return ChangeStatus::CHANGED; + } + +protected: + // An assumed simplified value. Initially, it is set to Optional::None, which + // means that the value is not clear under current assumption. If in the + // pessimistic state, getAssumedSimplifiedValue doesn't return this value but + // returns orignal associated value. + Optional<Value *> SimplifiedAssociatedValue; +}; + +struct AAValueSimplifyArgument final : AAValueSimplifyImpl { + AAValueSimplifyArgument(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyImpl(IRP, A) {} + + void initialize(Attributor &A) override { + AAValueSimplifyImpl::initialize(A); + if (!getAnchorScope() || getAnchorScope()->isDeclaration()) + indicatePessimisticFixpoint(); + if (hasAttr({Attribute::InAlloca, Attribute::Preallocated, + Attribute::StructRet, Attribute::Nest}, + /* IgnoreSubsumingPositions */ true)) + indicatePessimisticFixpoint(); + + // FIXME: This is a hack to prevent us from propagating function poiner in + // the new pass manager CGSCC pass as it creates call edges the + // CallGraphUpdater cannot handle yet. + Value &V = getAssociatedValue(); + if (V.getType()->isPointerTy() && + V.getType()->getPointerElementType()->isFunctionTy() && + !A.isModulePass()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // Byval is only replacable if it is readonly otherwise we would write into + // the replaced value and not the copy that byval creates implicitly. + Argument *Arg = getAssociatedArgument(); + if (Arg->hasByValAttr()) { + // TODO: We probably need to verify synchronization is not an issue, e.g., + // there is no race by not copying a constant byval. + const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition()); + if (!MemAA.isAssumedReadOnly()) + return indicatePessimisticFixpoint(); + } + + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForCallSite = [&](AbstractCallSite ACS) { + const IRPosition &ACSArgPos = + IRPosition::callsite_argument(ACS, getArgNo()); + // Check if a coresponding argument was found or if it is on not + // associated (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + // We can only propagate thread independent values through callbacks. + // This is different to direct/indirect call sites because for them we + // know the thread executing the caller and callee is the same. For + // callbacks this is not guaranteed, thus a thread dependent value could + // be different for the caller and callee, making it invalid to propagate. + Value &ArgOp = ACSArgPos.getAssociatedValue(); + if (ACS.isCallbackCall()) + if (auto *C = dyn_cast<Constant>(&ArgOp)) + if (C->isThreadDependent()) + return false; + return checkAndUpdate(A, *this, ArgOp, SimplifiedAssociatedValue); + }; + + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(PredForCallSite, *this, true, + AllCallSitesKnown)) + if (!askSimplifiedValueForAAValueConstantRange(A)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyReturned : AAValueSimplifyImpl { + AAValueSimplifyReturned(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForReturned = [&](Value &V) { + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!A.checkForAllReturnedValues(PredForReturned, *this)) + if (!askSimplifiedValueForAAValueConstantRange(A)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (SimplifiedAssociatedValue.hasValue() && + !SimplifiedAssociatedValue.getValue()) + return Changed; + + Value &V = getAssociatedValue(); + auto *C = SimplifiedAssociatedValue.hasValue() + ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue()) + : UndefValue::get(V.getType()); + if (C) { + auto PredForReturned = + [&](Value &V, const SmallSetVector<ReturnInst *, 4> &RetInsts) { + // We can replace the AssociatedValue with the constant. + if (&V == C || V.getType() != C->getType() || isa<UndefValue>(V)) + return true; + + for (ReturnInst *RI : RetInsts) { + if (RI->getFunction() != getAnchorScope()) + continue; + auto *RC = C; + if (RC->getType() != RI->getReturnValue()->getType()) + RC = ConstantExpr::getBitCast(RC, + RI->getReturnValue()->getType()); + LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *RC + << " in " << *RI << " :: " << *this << "\n"); + if (A.changeUseAfterManifest(RI->getOperandUse(0), *RC)) + Changed = ChangeStatus::CHANGED; + } + return true; + }; + A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this); + } + + return Changed | AAValueSimplify::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFloating : AAValueSimplifyImpl { + AAValueSimplifyFloating(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // FIXME: This might have exposed a SCC iterator update bug in the old PM. + // Needs investigation. + // AAValueSimplifyImpl::initialize(A); + Value &V = getAnchorValue(); + + // TODO: add other stuffs + if (isa<Constant>(V)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, + bool Stripped) -> bool { + auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // TODO: Look the instruction and check recursively. + + LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V + << "\n"); + return false; + } + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + bool Dummy = false; + if (!genericValueTraversal<AAValueSimplify, bool>( + A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(), + /* UseValueSimplify */ false)) + if (!askSimplifiedValueForAAValueConstantRange(A)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFunction : AAValueSimplifyImpl { + AAValueSimplifyFunction(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + SimplifiedAssociatedValue = &getAnchorValue(); + indicateOptimisticFixpoint(); + } + /// See AbstractAttribute::initialize(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable( + "AAValueSimplify(Function|CallSite)::updateImpl will not be called"); + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSite : AAValueSimplifyFunction { + AAValueSimplifyCallSite(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyFunction(IRP, A) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned { + AAValueSimplifyCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyReturned(IRP, A) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + return AAValueSimplifyImpl::manifest(A); + } + + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(value_simplify) + } +}; +struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { + AAValueSimplifyCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAValueSimplifyFloating(IRP, A) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (SimplifiedAssociatedValue.hasValue() && + !SimplifiedAssociatedValue.getValue()) + return Changed; + + Value &V = getAssociatedValue(); + auto *C = SimplifiedAssociatedValue.hasValue() + ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue()) + : UndefValue::get(V.getType()); + if (C) { + Use &U = cast<CallBase>(&getAnchorValue())->getArgOperandUse(getArgNo()); + // We can replace the AssociatedValue with the constant. + if (&V != C && V.getType() == C->getType()) { + if (A.changeUseAfterManifest(U, *C)) + Changed = ChangeStatus::CHANGED; + } + } + + return Changed | AAValueSimplify::manifest(A); + } + + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(value_simplify) + } +}; + +/// ----------------------- Heap-To-Stack Conversion --------------------------- +struct AAHeapToStackImpl : public AAHeapToStack { + AAHeapToStackImpl(const IRPosition &IRP, Attributor &A) + : AAHeapToStack(IRP, A) {} + + const std::string getAsStr() const override { + return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()); + } + + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function *F = getAnchorScope(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + for (Instruction *MallocCall : MallocCalls) { + // This malloc cannot be replaced. + if (BadMallocCalls.count(MallocCall)) + continue; + + for (Instruction *FreeCall : FreesForMalloc[MallocCall]) { + LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); + A.deleteAfterManifest(*FreeCall); + HasChanged = ChangeStatus::CHANGED; + } + + LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall + << "\n"); + + Align Alignment; + Constant *Size; + if (isCallocLikeFn(MallocCall, TLI)) { + auto *Num = cast<ConstantInt>(MallocCall->getOperand(0)); + auto *SizeT = cast<ConstantInt>(MallocCall->getOperand(1)); + APInt TotalSize = SizeT->getValue() * Num->getValue(); + Size = + ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); + } else if (isAlignedAllocLikeFn(MallocCall, TLI)) { + Size = cast<ConstantInt>(MallocCall->getOperand(1)); + Alignment = MaybeAlign(cast<ConstantInt>(MallocCall->getOperand(0)) + ->getValue() + .getZExtValue()) + .valueOrOne(); + } else { + Size = cast<ConstantInt>(MallocCall->getOperand(0)); + } + + unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace(); + Instruction *AI = + new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment, + "", MallocCall->getNextNode()); + + if (AI->getType() != MallocCall->getType()) + AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", + AI->getNextNode()); + + A.changeValueAfterManifest(*MallocCall, *AI); + + if (auto *II = dyn_cast<InvokeInst>(MallocCall)) { + auto *NBB = II->getNormalDest(); + BranchInst::Create(NBB, MallocCall->getParent()); + A.deleteAfterManifest(*MallocCall); + } else { + A.deleteAfterManifest(*MallocCall); + } + + // Zero out the allocated memory if it was a calloc. + if (isCallocLikeFn(MallocCall, TLI)) { + auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", + AI->getNextNode()); + Value *Ops[] = { + BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size, + ConstantInt::get(Type::getInt1Ty(F->getContext()), false)}; + + Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()}; + Module *M = F->getParent(); + Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); + CallInst::Create(Fn, Ops, "", BI->getNextNode()); + } + HasChanged = ChangeStatus::CHANGED; + } + + return HasChanged; + } + + /// Collection of all malloc calls in a function. + SmallSetVector<Instruction *, 4> MallocCalls; + + /// Collection of malloc calls that cannot be converted. + DenseSet<const Instruction *> BadMallocCalls; + + /// A map for each malloc call to the set of associated free calls. + DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc; + + ChangeStatus updateImpl(Attributor &A) override; +}; + +ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { + const Function *F = getAnchorScope(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); + + auto FreeCheck = [&](Instruction &I) { + const auto &Frees = FreesForMalloc.lookup(&I); + if (Frees.size() != 1) + return false; + Instruction *UniqueFree = *Frees.begin(); + return Explorer.findInContextOf(UniqueFree, I.getNextNode()); + }; + + auto UsesCheck = [&](Instruction &I) { + bool ValidUsesOnly = true; + bool MustUse = true; + auto Pred = [&](const Use &U, bool &Follow) -> bool { + Instruction *UserI = cast<Instruction>(U.getUser()); + if (isa<LoadInst>(UserI)) + return true; + if (auto *SI = dyn_cast<StoreInst>(UserI)) { + if (SI->getValueOperand() == U.get()) { + LLVM_DEBUG(dbgs() + << "[H2S] escaping store to memory: " << *UserI << "\n"); + ValidUsesOnly = false; + } else { + // A store into the malloc'ed memory is fine. + } + return true; + } + if (auto *CB = dyn_cast<CallBase>(UserI)) { + if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd()) + return true; + // Record malloc. + if (isFreeCall(UserI, TLI)) { + if (MustUse) { + FreesForMalloc[&I].insert(UserI); + } else { + LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: " + << *UserI << "\n"); + ValidUsesOnly = false; + } + return true; + } + + unsigned ArgNo = CB->getArgOperandNo(&U); + + const auto &NoCaptureAA = A.getAAFor<AANoCapture>( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + + // If a callsite argument use is nofree, we are fine. + const auto &ArgNoFreeAA = A.getAAFor<AANoFree>( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + + if (!NoCaptureAA.isAssumedNoCapture() || + !ArgNoFreeAA.isAssumedNoFree()) { + LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); + ValidUsesOnly = false; + } + return true; + } + + if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) || + isa<PHINode>(UserI) || isa<SelectInst>(UserI)) { + MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI)); + Follow = true; + return true; + } + // Unknown user for which we can not track uses further (in a way that + // makes sense). + LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n"); + ValidUsesOnly = false; + return true; + }; + A.checkForAllUses(Pred, *this, I); + return ValidUsesOnly; + }; + + auto MallocCallocCheck = [&](Instruction &I) { + if (BadMallocCalls.count(&I)) + return true; + + bool IsMalloc = isMallocLikeFn(&I, TLI); + bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI); + bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); + if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) { + BadMallocCalls.insert(&I); + return true; + } + + if (IsMalloc) { + if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0))) + if (Size->getValue().ule(MaxHeapToStackSize)) + if (UsesCheck(I) || FreeCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } else if (IsAlignedAllocLike && isa<ConstantInt>(I.getOperand(0))) { + // Only if the alignment and sizes are constant. + if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1))) + if (Size->getValue().ule(MaxHeapToStackSize)) + if (UsesCheck(I) || FreeCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } else if (IsCalloc) { + bool Overflow = false; + if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0))) + if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1))) + if ((Size->getValue().umul_ov(Num->getValue(), Overflow)) + .ule(MaxHeapToStackSize)) + if (!Overflow && (UsesCheck(I) || FreeCheck(I))) { + MallocCalls.insert(&I); + return true; + } + } + + BadMallocCalls.insert(&I); + return true; + }; + + size_t NumBadMallocs = BadMallocCalls.size(); + + A.checkForAllCallLikeInstructions(MallocCallocCheck, *this); + + if (NumBadMallocs != BadMallocCalls.size()) + return ChangeStatus::CHANGED; + + return ChangeStatus::UNCHANGED; +} + +struct AAHeapToStackFunction final : public AAHeapToStackImpl { + AAHeapToStackFunction(const IRPosition &IRP, Attributor &A) + : AAHeapToStackImpl(IRP, A) {} + + /// See AbstractAttribute::trackStatistics(). + void trackStatistics() const override { + STATS_DECL( + MallocCalls, Function, + "Number of malloc/calloc/aligned_alloc calls converted to allocas"); + for (auto *C : MallocCalls) + if (!BadMallocCalls.count(C)) + ++BUILD_STAT_NAME(MallocCalls, Function); + } +}; + +/// ----------------------- Privatizable Pointers ------------------------------ +struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { + AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A) + : AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {} + + ChangeStatus indicatePessimisticFixpoint() override { + AAPrivatizablePtr::indicatePessimisticFixpoint(); + PrivatizableType = nullptr; + return ChangeStatus::CHANGED; + } + + /// Identify the type we can chose for a private copy of the underlying + /// argument. None means it is not clear yet, nullptr means there is none. + virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0; + + /// Return a privatizable type that encloses both T0 and T1. + /// TODO: This is merely a stub for now as we should manage a mapping as well. + Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) { + if (!T0.hasValue()) + return T1; + if (!T1.hasValue()) + return T0; + if (T0 == T1) + return T0; + return nullptr; + } + + Optional<Type *> getPrivatizableType() const override { + return PrivatizableType; + } + + const std::string getAsStr() const override { + return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]"; + } + +protected: + Optional<Type *> PrivatizableType; +}; + +// TODO: Do this for call site arguments (probably also other values) as well. + +struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { + AAPrivatizablePtrArgument(const IRPosition &IRP, Attributor &A) + : AAPrivatizablePtrImpl(IRP, A) {} + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional<Type *> identifyPrivatizableType(Attributor &A) override { + // If this is a byval argument and we know all the call sites (so we can + // rewrite them), there is no need to check them explicitly. + bool AllCallSitesKnown; + if (getIRPosition().hasAttr(Attribute::ByVal) && + A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this, + true, AllCallSitesKnown)) + return getAssociatedValue().getType()->getPointerElementType(); + + Optional<Type *> Ty; + unsigned ArgNo = getIRPosition().getArgNo(); + + // Make sure the associated call site argument has the same type at all call + // sites and it is an allocation we know is safe to privatize, for now that + // means we only allow alloca instructions. + // TODO: We can additionally analyze the accesses in the callee to create + // the type from that information instead. That is a little more + // involved and will be done in a follow up patch. + auto CallSiteCheck = [&](AbstractCallSite ACS) { + IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is one not + // associated (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + // Check that all call sites agree on a type. + auto &PrivCSArgAA = A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos); + Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType(); + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; + if (CSTy.hasValue() && CSTy.getValue()) + CSTy.getValue()->print(dbgs()); + else if (CSTy.hasValue()) + dbgs() << "<nullptr>"; + else + dbgs() << "<none>"; + }); + + Ty = combineTypes(Ty, CSTy); + + LLVM_DEBUG({ + dbgs() << " : New Type: "; + if (Ty.hasValue() && Ty.getValue()) + Ty.getValue()->print(dbgs()); + else if (Ty.hasValue()) + dbgs() << "<nullptr>"; + else + dbgs() << "<none>"; + dbgs() << "\n"; + }); + + return !Ty.hasValue() || Ty.getValue(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown)) + return nullptr; + return Ty; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + // The dependence is optional so we don't give up once we give up on the + // alignment. + A.getAAFor<AAAlign>(*this, IRPosition::value(getAssociatedValue()), + /* TrackDependence */ true, DepClassTy::OPTIONAL); + + // Avoid arguments with padding for now. + if (!getIRPosition().hasAttr(Attribute::ByVal) && + !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), + A.getInfoCache().getDL())) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n"); + return indicatePessimisticFixpoint(); + } + + // Verify callee and caller agree on how the promoted argument would be + // passed. + // TODO: The use of the ArgumentPromotion interface here is ugly, we need a + // specialized form of TargetTransformInfo::areFunctionArgsABICompatible + // which doesn't require the arguments ArgumentPromotion wanted to pass. + Function &Fn = *getIRPosition().getAnchorScope(); + SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy; + ArgsToPromote.insert(getAssociatedArgument()); + const auto *TTI = + A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn); + if (!TTI || + !ArgumentPromotionPass::areFunctionArgsABICompatible( + Fn, *TTI, ArgsToPromote, Dummy) || + ArgsToPromote.empty()) { + LLVM_DEBUG( + dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for " + << Fn.getName() << "\n"); + return indicatePessimisticFixpoint(); + } + + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector<Type *, 16> ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + + // Register a rewrite of the argument. + Argument *Arg = getAssociatedArgument(); + if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Rewrite not valid\n"); + return indicatePessimisticFixpoint(); + } + + unsigned ArgNo = Arg->getArgNo(); + + // Helper to check if for the given call site the associated argument is + // passed to a callback where the privatization would be different. + auto IsCompatiblePrivArgOfCallback = [&](CallBase &CB) { + SmallVector<const Use *, 4> CallbackUses; + AbstractCallSite::getCallbackUses(CB, CallbackUses); + for (const Use *U : CallbackUses) { + AbstractCallSite CBACS(U); + assert(CBACS && CBACS.isCallbackCall()); + for (Argument &CBArg : CBACS.getCalledFunction()->args()) { + int CBArgNo = CBACS.getCallArgOperandNo(CBArg); + + LLVM_DEBUG({ + dbgs() + << "[AAPrivatizablePtr] Argument " << *Arg + << "check if can be privatized in the context of its parent (" + << Arg->getParent()->getName() + << ")\n[AAPrivatizablePtr] because it is an argument in a " + "callback (" + << CBArgNo << "@" << CBACS.getCalledFunction()->getName() + << ")\n[AAPrivatizablePtr] " << CBArg << " : " + << CBACS.getCallArgOperand(CBArg) << " vs " + << CB.getArgOperand(ArgNo) << "\n" + << "[AAPrivatizablePtr] " << CBArg << " : " + << CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n"; + }); + + if (CBArgNo != int(ArgNo)) + continue; + const auto &CBArgPrivAA = + A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(CBArg)); + if (CBArgPrivAA.isValidState()) { + auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType(); + if (!CBArgPrivTy.hasValue()) + continue; + if (CBArgPrivTy.getValue() == PrivatizableType) + continue; + } + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] Argument " << *Arg + << " cannot be privatized in the context of its parent (" + << Arg->getParent()->getName() + << ")\n[AAPrivatizablePtr] because it is an argument in a " + "callback (" + << CBArgNo << "@" << CBACS.getCalledFunction()->getName() + << ").\n[AAPrivatizablePtr] for which the argument " + "privatization is not compatible.\n"; + }); + return false; + } + } + return true; + }; + + // Helper to check if for the given call site the associated argument is + // passed to a direct call where the privatization would be different. + auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) { + CallBase *DC = cast<CallBase>(ACS.getInstruction()); + int DCArgNo = ACS.getCallArgOperandNo(ArgNo); + assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() && + "Expected a direct call operand for callback call operand"); + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] Argument " << *Arg + << " check if be privatized in the context of its parent (" + << Arg->getParent()->getName() + << ")\n[AAPrivatizablePtr] because it is an argument in a " + "direct call of (" + << DCArgNo << "@" << DC->getCalledFunction()->getName() + << ").\n"; + }); + + Function *DCCallee = DC->getCalledFunction(); + if (unsigned(DCArgNo) < DCCallee->arg_size()) { + const auto &DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>( + *this, IRPosition::argument(*DCCallee->getArg(DCArgNo))); + if (DCArgPrivAA.isValidState()) { + auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType(); + if (!DCArgPrivTy.hasValue()) + return true; + if (DCArgPrivTy.getValue() == PrivatizableType) + return true; + } + } + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] Argument " << *Arg + << " cannot be privatized in the context of its parent (" + << Arg->getParent()->getName() + << ")\n[AAPrivatizablePtr] because it is an argument in a " + "direct call of (" + << ACS.getInstruction()->getCalledFunction()->getName() + << ").\n[AAPrivatizablePtr] for which the argument " + "privatization is not compatible.\n"; + }); + return false; + }; + + // Helper to check if the associated argument is used at the given abstract + // call site in a way that is incompatible with the privatization assumed + // here. + auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) { + if (ACS.isDirectCall()) + return IsCompatiblePrivArgOfCallback(*ACS.getInstruction()); + if (ACS.isCallbackCall()) + return IsCompatiblePrivArgOfDirectCS(ACS); + return false; + }; + + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true, + AllCallSitesKnown)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// Given a type to private \p PrivType, collect the constituates (which are + /// used) in \p ReplacementTypes. + static void + identifyReplacementTypes(Type *PrivType, + SmallVectorImpl<Type *> &ReplacementTypes) { + // TODO: For now we expand the privatization type to the fullest which can + // lead to dead arguments that need to be removed later. + assert(PrivType && "Expected privatizable type!"); + + // Traverse the type, extract constituate types on the outermost level. + if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) { + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) + ReplacementTypes.push_back(PrivStructType->getElementType(u)); + } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) { + ReplacementTypes.append(PrivArrayType->getNumElements(), + PrivArrayType->getElementType()); + } else { + ReplacementTypes.push_back(PrivType); + } + } + + /// Initialize \p Base according to the type \p PrivType at position \p IP. + /// The values needed are taken from the arguments of \p F starting at + /// position \p ArgNo. + static void createInitialization(Type *PrivType, Value &Base, Function &F, + unsigned ArgNo, Instruction &IP) { + assert(PrivType && "Expected privatizable type!"); + + IRBuilder<NoFolder> IRB(&IP); + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Traverse the type, build GEPs and stores. + if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo(); + Value *Ptr = constructPointer( + PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) { + Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else { + new StoreInst(F.getArg(ArgNo), &Base, &IP); + } + } + + /// Extract values from \p Base according to the type \p PrivType at the + /// call position \p ACS. The values are appended to \p ReplacementValues. + void createReplacementValues(Align Alignment, Type *PrivType, + AbstractCallSite ACS, Value *Base, + SmallVectorImpl<Value *> &ReplacementValues) { + assert(Base && "Expected base value!"); + assert(PrivType && "Expected privatizable type!"); + Instruction *IP = ACS.getInstruction(); + + IRBuilder<NoFolder> IRB(IP); + const DataLayout &DL = IP->getModule()->getDataLayout(); + + if (Base->getType()->getPointerElementType() != PrivType) + Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(), + "", ACS.getInstruction()); + + // Traverse the type, build GEPs and loads. + if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u); + Value *Ptr = + constructPointer(PointeeTy->getPointerTo(), Base, + PrivStructLayout->getElementOffset(u), IRB, DL); + LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP); + L->setAlignment(Alignment); + ReplacementValues.push_back(L); + } + } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) { + Type *PointeeTy = PrivArrayType->getElementType(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); + Type *PointeePtrTy = PointeeTy->getPointerTo(); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL); + LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP); + L->setAlignment(Alignment); + ReplacementValues.push_back(L); + } + } else { + LoadInst *L = new LoadInst(PrivType, Base, "", IP); + L->setAlignment(Alignment); + ReplacementValues.push_back(L); + } + } + + /// See AbstractAttribute::manifest(...) + ChangeStatus manifest(Attributor &A) override { + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + assert(PrivatizableType.getValue() && "Expected privatizable type!"); + + // Collect all tail calls in the function as we cannot allow new allocas to + // escape into tail recursion. + // TODO: Be smarter about new allocas escaping into tail calls. + SmallVector<CallInst *, 16> TailCalls; + if (!A.checkForAllInstructions( + [&](Instruction &I) { + CallInst &CI = cast<CallInst>(I); + if (CI.isTailCall()) + TailCalls.push_back(&CI); + return true; + }, + *this, {Instruction::Call})) + return ChangeStatus::UNCHANGED; + + Argument *Arg = getAssociatedArgument(); + // Query AAAlign attribute for alignment of associated argument to + // determine the best alignment of loads. + const auto &AlignAA = A.getAAFor<AAAlign>(*this, IRPosition::value(*Arg)); + + // Callback to repair the associated function. A new alloca is placed at the + // beginning and initialized with the values passed through arguments. The + // new alloca replaces the use of the old pointer argument. + Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + Function &ReplacementFn, Function::arg_iterator ArgIt) { + BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); + Instruction *IP = &*EntryBB.getFirstInsertionPt(); + auto *AI = new AllocaInst(PrivatizableType.getValue(), 0, + Arg->getName() + ".priv", IP); + createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, + ArgIt->getArgNo(), *IP); + Arg->replaceAllUsesWith(AI); + + for (CallInst *CI : TailCalls) + CI->setTailCall(false); + }; + + // Callback to repair a call site of the associated function. The elements + // of the privatizable type are loaded prior to the call and passed to the + // new function version. + Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB = + [=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI, + AbstractCallSite ACS, + SmallVectorImpl<Value *> &NewArgOperands) { + // When no alignment is specified for the load instruction, + // natural alignment is assumed. + createReplacementValues( + assumeAligned(AlignAA.getAssumedAlign()), + PrivatizableType.getValue(), ACS, + ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), + NewArgOperands); + }; + + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector<Type *, 16> ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + + // Register a rewrite of the argument. + if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes, + std::move(FnRepairCB), + std::move(ACSRepairCB))) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { + AAPrivatizablePtrFloating(const IRPosition &IRP, Attributor &A) + : AAPrivatizablePtrImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + virtual void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::" + "updateImpl will not be called"); + } + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional<Type *> identifyPrivatizableType(Attributor &A) override { + Value *Obj = + GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL()); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n"); + return nullptr; + } + + if (auto *AI = dyn_cast<AllocaInst>(Obj)) + if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) + if (CI->isOne()) + return Obj->getType()->getPointerElementType(); + if (auto *Arg = dyn_cast<Argument>(Obj)) { + auto &PrivArgAA = + A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(*Arg)); + if (PrivArgAA.isAssumedPrivatizablePtr()) + return Obj->getType()->getPointerElementType(); + } + + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid " + "alloca nor privatizable argument: " + << *Obj << "!\n"); + return nullptr; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteArgument final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAPrivatizablePtrFloating(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (getIRPosition().hasAttr(Attribute::ByVal)) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + const IRPosition &IRP = getIRPosition(); + auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP); + if (!NoCaptureAA.isAssumedNoCapture()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n"); + return indicatePessimisticFixpoint(); + } + + auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP); + if (!NoAliasAA.isAssumedNoAlias()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n"); + return indicatePessimisticFixpoint(); + } + + const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, IRP); + if (!MemBehaviorAA.isAssumedReadOnly()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteReturned final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAPrivatizablePtrFloating(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating { + AAPrivatizablePtrReturned(const IRPosition &IRP, Attributor &A) + : AAPrivatizablePtrFloating(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); + } +}; + +/// -------------------- Memory Behavior Attributes ---------------------------- +/// Includes read-none, read-only, and write-only. +/// ---------------------------------------------------------------------------- +struct AAMemoryBehaviorImpl : public AAMemoryBehavior { + AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A) + : AAMemoryBehavior(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(getIRPosition(), getState()); + IRAttribute::initialize(A); + } + + /// Return the memory behavior information encoded in the IR for \p IRP. + static void getKnownStateFromValue(const IRPosition &IRP, + BitIntegerState &State, + bool IgnoreSubsumingPositions = false) { + SmallVector<Attribute, 2> Attrs; + IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions); + for (const Attribute &Attr : Attrs) { + switch (Attr.getKindAsEnum()) { + case Attribute::ReadNone: + State.addKnownBits(NO_ACCESSES); + break; + case Attribute::ReadOnly: + State.addKnownBits(NO_WRITES); + break; + case Attribute::WriteOnly: + State.addKnownBits(NO_READS); + break; + default: + llvm_unreachable("Unexpected attribute!"); + } + } + + if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) { + if (!I->mayReadFromMemory()) + State.addKnownBits(NO_READS); + if (!I->mayWriteToMemory()) + State.addKnownBits(NO_WRITES); + } + } + + /// See AbstractAttribute::getDeducedAttributes(...). + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + assert(Attrs.size() == 0); + if (isAssumedReadNone()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); + else if (isAssumedReadOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly)); + else if (isAssumedWriteOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly)); + assert(Attrs.size() <= 1); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + if (hasAttr(Attribute::ReadNone, /* IgnoreSubsumingPositions */ true)) + return ChangeStatus::UNCHANGED; + + const IRPosition &IRP = getIRPosition(); + + // Check if we would improve the existing attributes first. + SmallVector<Attribute, 4> DeducedAttrs; + getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); + if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { + return IRP.hasAttr(Attr.getKindAsEnum(), + /* IgnoreSubsumingPositions */ true); + })) + return ChangeStatus::UNCHANGED; + + // Clear existing attributes. + IRP.removeAttrs(AttrKinds); + + // Use the generic manifest method. + return IRAttribute::manifest(A); + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isAssumedReadNone()) + return "readnone"; + if (isAssumedReadOnly()) + return "readonly"; + if (isAssumedWriteOnly()) + return "writeonly"; + return "may-read/write"; + } + + /// The set of IR attributes AAMemoryBehavior deals with. + static const Attribute::AttrKind AttrKinds[3]; +}; + +const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = { + Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly}; + +/// Memory behavior attribute for a floating value. +struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { + AAMemoryBehaviorFloating(const IRPosition &IRP, Attributor &A) + : AAMemoryBehaviorImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + // Initialize the use vector with all direct uses of the associated value. + for (const Use &U : getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FLOATING_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FLOATING_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FLOATING_ATTR(writeonly) + } + +private: + /// Return true if users of \p UserI might access the underlying + /// variable/location described by \p U and should therefore be analyzed. + bool followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI); + + /// Update the state according to the effect of use \p U in \p UserI. + void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); + +protected: + /// Container for (transitive) uses of the associated argument. + SetVector<const Use *> Uses; +}; + +/// Memory behavior attribute for function argument. +struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { + AAMemoryBehaviorArgument(const IRPosition &IRP, Attributor &A) + : AAMemoryBehaviorFloating(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + const IRPosition &IRP = getIRPosition(); + // TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we + // can query it when we use has/getAttr. That would allow us to reuse the + // initialize of the base class here. + bool HasByVal = + IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true); + getKnownStateFromValue(IRP, getState(), + /* IgnoreSubsumingPositions */ HasByVal); + + // Initialize the use vector with all direct uses of the associated value. + Argument *Arg = getAssociatedArgument(); + if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent()))) { + indicatePessimisticFixpoint(); + } else { + // Initialize the use vector with all direct uses of the associated value. + for (const Use &U : Arg->uses()) + Uses.insert(&U); + } + } + + ChangeStatus manifest(Attributor &A) override { + // TODO: Pointer arguments are not supported on vectors of pointers yet. + if (!getAssociatedValue().getType()->isPointerTy()) + return ChangeStatus::UNCHANGED; + + // TODO: From readattrs.ll: "inalloca parameters are always + // considered written" + if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) { + removeKnownBits(NO_WRITES); + removeAssumedBits(NO_WRITES); + } + return AAMemoryBehaviorFloating::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_ARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_ARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_ARG_ATTR(writeonly) + } +}; + +struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { + AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAMemoryBehaviorArgument(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (Argument *Arg = getAssociatedArgument()) { + if (Arg->hasByValAttr()) { + addKnownBits(NO_WRITES); + removeKnownBits(NO_READS); + removeAssumedBits(NO_READS); + } + } + AAMemoryBehaviorArgument::initialize(A); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AAMemoryBehavior::StateType &>(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CSARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CSARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CSARG_ATTR(writeonly) + } +}; + +/// Memory behavior attribute for a call site return position. +struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { + AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAMemoryBehaviorFloating(IRP, A) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // We do not annotate returned values. + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// An AA to represent the memory behavior function attributes. +struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { + AAMemoryBehaviorFunction(const IRPosition &IRP, Attributor &A) + : AAMemoryBehaviorImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + Function &F = cast<Function>(getAnchorValue()); + if (isAssumedReadNone()) { + F.removeFnAttr(Attribute::ArgMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + return AAMemoryBehaviorImpl::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FN_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FN_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FN_ATTR(writeonly) + } +}; + +/// AAMemoryBehavior attribute for call sites. +struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { + AAMemoryBehaviorCallSite(const IRPosition &IRP, Attributor &A) + : AAMemoryBehaviorImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || !A.isFunctionIPOAmendable(*F)) { + indicatePessimisticFixpoint(); + return; + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast<const AAMemoryBehavior::StateType &>(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CS_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CS_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CS_ATTR(writeonly) + } +}; + +ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { + + // The current assumed state used to determine a change. + auto AssumedState = getAssumed(); + + auto CheckRWInst = [&](Instruction &I) { + // If the instruction has an own memory behavior state, use it to restrict + // the local state. No further analysis is required as the other memory + // state is as optimistic as it gets. + if (const auto *CB = dyn_cast<CallBase>(&I)) { + const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( + *this, IRPosition::callsite_function(*CB)); + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return !isAtFixpoint(); + } + + // Remove access kind modifiers if necessary. + if (I.mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (I.mayWriteToMemory()) + removeAssumedBits(NO_WRITES); + return !isAtFixpoint(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) + return indicatePessimisticFixpoint(); + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { + + const IRPosition &IRP = getIRPosition(); + const IRPosition &FnPos = IRPosition::function_scope(IRP); + AAMemoryBehavior::StateType &S = getState(); + + // First, check the function scope. We take the known information and we avoid + // work if the assumed information implies the current assumed information for + // this attribute. This is a valid for all but byval arguments. + Argument *Arg = IRP.getAssociatedArgument(); + AAMemoryBehavior::base_t FnMemAssumedState = + AAMemoryBehavior::StateType::getWorstState(); + if (!Arg || !Arg->hasByValAttr()) { + const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>( + *this, FnPos, /* TrackDependence */ true, DepClassTy::OPTIONAL); + FnMemAssumedState = FnMemAA.getAssumed(); + S.addKnownBits(FnMemAA.getKnown()); + if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed()) + return ChangeStatus::UNCHANGED; + } + + // Make sure the value is not captured (except through "return"), if + // it is, any information derived would be irrelevant anyway as we cannot + // check the potential aliases introduced by the capture. However, no need + // to fall back to anythign less optimistic than the function state. + const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>( + *this, IRP, /* TrackDependence */ true, DepClassTy::OPTIONAL); + if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + S.intersectAssumedBits(FnMemAssumedState); + return ChangeStatus::CHANGED; + } + + // The current assumed state used to determine a change. + auto AssumedState = S.getAssumed(); + + // Liveness information to exclude dead users. + // TODO: Take the FnPos once we have call site specific liveness information. + const auto &LivenessAA = A.getAAFor<AAIsDead>( + *this, IRPosition::function(*IRP.getAssociatedFunction()), + /* TrackDependence */ false); + + // Visit and expand uses until all are analyzed or a fixpoint is reached. + for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) { + const Use *U = Uses[i]; + Instruction *UserI = cast<Instruction>(U->getUser()); + LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI + << " [Dead: " << (A.isAssumedDead(*U, this, &LivenessAA)) + << "]\n"); + if (A.isAssumedDead(*U, this, &LivenessAA)) + continue; + + // Droppable users, e.g., llvm::assume does not actually perform any action. + if (UserI->isDroppable()) + continue; + + // Check if the users of UserI should also be visited. + if (followUsersOfUseIn(A, U, UserI)) + for (const Use &UserIUse : UserI->uses()) + Uses.insert(&UserIUse); + + // If UserI might touch memory we analyze the use in detail. + if (UserI->mayReadOrWriteMemory()) + analyzeUseIn(A, U, UserI); + } + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + // The loaded value is unrelated to the pointer argument, no need to + // follow the users of the load. + if (isa<LoadInst>(UserI)) + return false; + + // By default we follow all uses assuming UserI might leak information on U, + // we have special handling for call sites operands though. + const auto *CB = dyn_cast<CallBase>(UserI); + if (!CB || !CB->isArgOperand(U)) + return true; + + // If the use is a call argument known not to be captured, the users of + // the call do not need to be visited because they have to be unrelated to + // the input. Note that this check is not trivial even though we disallow + // general capturing of the underlying argument. The reason is that the + // call might the argument "through return", which we allow and for which we + // need to check call users. + if (U->get()->getType()->isPointerTy()) { + unsigned ArgNo = CB->getArgOperandNo(U); + const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>( + *this, IRPosition::callsite_argument(*CB, ArgNo), + /* TrackDependence */ true, DepClassTy::OPTIONAL); + return !ArgNoCaptureAA.isAssumedNoCapture(); + } + + return true; +} + +void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + assert(UserI->mayReadOrWriteMemory()); + + switch (UserI->getOpcode()) { + default: + // TODO: Handle all atomics and other side-effect operations we know of. + break; + case Instruction::Load: + // Loads cause the NO_READS property to disappear. + removeAssumedBits(NO_READS); + return; + + case Instruction::Store: + // Stores cause the NO_WRITES property to disappear if the use is the + // pointer operand. Note that we do assume that capturing was taken care of + // somewhere else. + if (cast<StoreInst>(UserI)->getPointerOperand() == U->get()) + removeAssumedBits(NO_WRITES); + return; + + case Instruction::Call: + case Instruction::CallBr: + case Instruction::Invoke: { + // For call sites we look at the argument memory behavior attribute (this + // could be recursive!) in order to restrict our own state. + const auto *CB = cast<CallBase>(UserI); + + // Give up on operand bundles. + if (CB->isBundleOperand(U)) { + indicatePessimisticFixpoint(); + return; + } + + // Calling a function does read the function pointer, maybe write it if the + // function is self-modifying. + if (CB->isCallee(U)) { + removeAssumedBits(NO_READS); + break; + } + + // Adjust the possible access behavior based on the information on the + // argument. + IRPosition Pos; + if (U->get()->getType()->isPointerTy()) + Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(U)); + else + Pos = IRPosition::callsite_function(*CB); + const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( + *this, Pos, + /* TrackDependence */ true, DepClassTy::OPTIONAL); + // "assumed" has at most the same bits as the MemBehaviorAA assumed + // and at least "known". + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return; + } + }; + + // Generally, look at the "may-properties" and adjust the assumed state if we + // did not trigger special handling before. + if (UserI->mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (UserI->mayWriteToMemory()) + removeAssumedBits(NO_WRITES); +} + +} // namespace + +/// -------------------- Memory Locations Attributes --------------------------- +/// Includes read-none, argmemonly, inaccessiblememonly, +/// inaccessiblememorargmemonly +/// ---------------------------------------------------------------------------- + +std::string AAMemoryLocation::getMemoryLocationsAsStr( + AAMemoryLocation::MemoryLocationsKind MLK) { + if (0 == (MLK & AAMemoryLocation::NO_LOCATIONS)) + return "all memory"; + if (MLK == AAMemoryLocation::NO_LOCATIONS) + return "no memory"; + std::string S = "memory:"; + if (0 == (MLK & AAMemoryLocation::NO_LOCAL_MEM)) + S += "stack,"; + if (0 == (MLK & AAMemoryLocation::NO_CONST_MEM)) + S += "constant,"; + if (0 == (MLK & AAMemoryLocation::NO_GLOBAL_INTERNAL_MEM)) + S += "internal global,"; + if (0 == (MLK & AAMemoryLocation::NO_GLOBAL_EXTERNAL_MEM)) + S += "external global,"; + if (0 == (MLK & AAMemoryLocation::NO_ARGUMENT_MEM)) + S += "argument,"; + if (0 == (MLK & AAMemoryLocation::NO_INACCESSIBLE_MEM)) + S += "inaccessible,"; + if (0 == (MLK & AAMemoryLocation::NO_MALLOCED_MEM)) + S += "malloced,"; + if (0 == (MLK & AAMemoryLocation::NO_UNKOWN_MEM)) + S += "unknown,"; + S.pop_back(); + return S; +} + +namespace { +struct AAMemoryLocationImpl : public AAMemoryLocation { + + AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) + : AAMemoryLocation(IRP, A), Allocator(A.Allocator) { + for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) + AccessKind2Accesses[u] = nullptr; + } + + ~AAMemoryLocationImpl() { + // The AccessSets are allocated via a BumpPtrAllocator, we call + // the destructor manually. + for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) + if (AccessKind2Accesses[u]) + AccessKind2Accesses[u]->~AccessSet(); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(A, getIRPosition(), getState()); + IRAttribute::initialize(A); + } + + /// Return the memory behavior information encoded in the IR for \p IRP. + static void getKnownStateFromValue(Attributor &A, const IRPosition &IRP, + BitIntegerState &State, + bool IgnoreSubsumingPositions = false) { + // For internal functions we ignore `argmemonly` and + // `inaccessiblememorargmemonly` as we might break it via interprocedural + // constant propagation. It is unclear if this is the best way but it is + // unlikely this will cause real performance problems. If we are deriving + // attributes for the anchor function we even remove the attribute in + // addition to ignoring it. + bool UseArgMemOnly = true; + Function *AnchorFn = IRP.getAnchorScope(); + if (AnchorFn && A.isRunOn(*AnchorFn)) + UseArgMemOnly = !AnchorFn->hasLocalLinkage(); + + SmallVector<Attribute, 2> Attrs; + IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions); + for (const Attribute &Attr : Attrs) { + switch (Attr.getKindAsEnum()) { + case Attribute::ReadNone: + State.addKnownBits(NO_LOCAL_MEM | NO_CONST_MEM); + break; + case Attribute::InaccessibleMemOnly: + State.addKnownBits(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); + break; + case Attribute::ArgMemOnly: + if (UseArgMemOnly) + State.addKnownBits(inverseLocation(NO_ARGUMENT_MEM, true, true)); + else + IRP.removeAttrs({Attribute::ArgMemOnly}); + break; + case Attribute::InaccessibleMemOrArgMemOnly: + if (UseArgMemOnly) + State.addKnownBits(inverseLocation( + NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); + else + IRP.removeAttrs({Attribute::InaccessibleMemOrArgMemOnly}); + break; + default: + llvm_unreachable("Unexpected attribute!"); + } + } + } + + /// See AbstractAttribute::getDeducedAttributes(...). + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl<Attribute> &Attrs) const override { + assert(Attrs.size() == 0); + if (isAssumedReadNone()) { + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); + } else if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) { + if (isAssumedInaccessibleMemOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::InaccessibleMemOnly)); + else if (isAssumedArgMemOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ArgMemOnly)); + else if (isAssumedInaccessibleOrArgMemOnly()) + Attrs.push_back( + Attribute::get(Ctx, Attribute::InaccessibleMemOrArgMemOnly)); + } + assert(Attrs.size() <= 1); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + const IRPosition &IRP = getIRPosition(); + + // Check if we would improve the existing attributes first. + SmallVector<Attribute, 4> DeducedAttrs; + getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); + if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { + return IRP.hasAttr(Attr.getKindAsEnum(), + /* IgnoreSubsumingPositions */ true); + })) + return ChangeStatus::UNCHANGED; + + // Clear existing attributes. + IRP.removeAttrs(AttrKinds); + if (isAssumedReadNone()) + IRP.removeAttrs(AAMemoryBehaviorImpl::AttrKinds); + + // Use the generic manifest method. + return IRAttribute::manifest(A); + } + + /// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...). + bool checkForAllAccessesToMemoryKind( + function_ref<bool(const Instruction *, const Value *, AccessKind, + MemoryLocationsKind)> + Pred, + MemoryLocationsKind RequestedMLK) const override { + if (!isValidState()) + return false; + + MemoryLocationsKind AssumedMLK = getAssumedNotAccessedLocation(); + if (AssumedMLK == NO_LOCATIONS) + return true; + + unsigned Idx = 0; + for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; + CurMLK *= 2, ++Idx) { + if (CurMLK & RequestedMLK) + continue; + + if (const AccessSet *Accesses = AccessKind2Accesses[Idx]) + for (const AccessInfo &AI : *Accesses) + if (!Pred(AI.I, AI.Ptr, AI.Kind, CurMLK)) + return false; + } + + return true; + } + + ChangeStatus indicatePessimisticFixpoint() override { + // If we give up and indicate a pessimistic fixpoint this instruction will + // become an access for all potential access kinds: + // TODO: Add pointers for argmemonly and globals to improve the results of + // checkForAllAccessesToMemoryKind. + bool Changed = false; + MemoryLocationsKind KnownMLK = getKnown(); + Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()); + for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) + if (!(CurMLK & KnownMLK)) + updateStateAndAccessesMap(getState(), CurMLK, I, nullptr, Changed, + getAccessKindFromInst(I)); + return AAMemoryLocation::indicatePessimisticFixpoint(); + } + +protected: + /// Helper struct to tie together an instruction that has a read or write + /// effect with the pointer it accesses (if any). + struct AccessInfo { + + /// The instruction that caused the access. + const Instruction *I; + + /// The base pointer that is accessed, or null if unknown. + const Value *Ptr; + + /// The kind of access (read/write/read+write). + AccessKind Kind; + + bool operator==(const AccessInfo &RHS) const { + return I == RHS.I && Ptr == RHS.Ptr && Kind == RHS.Kind; + } + bool operator()(const AccessInfo &LHS, const AccessInfo &RHS) const { + if (LHS.I != RHS.I) + return LHS.I < RHS.I; + if (LHS.Ptr != RHS.Ptr) + return LHS.Ptr < RHS.Ptr; + if (LHS.Kind != RHS.Kind) + return LHS.Kind < RHS.Kind; + return false; + } + }; + + /// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the + /// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind. + using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>; + AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()]; + + /// Return the kind(s) of location that may be accessed by \p V. + AAMemoryLocation::MemoryLocationsKind + categorizeAccessedLocations(Attributor &A, Instruction &I, bool &Changed); + + /// Return the access kind as determined by \p I. + AccessKind getAccessKindFromInst(const Instruction *I) { + AccessKind AK = READ_WRITE; + if (I) { + AK = I->mayReadFromMemory() ? READ : NONE; + AK = AccessKind(AK | (I->mayWriteToMemory() ? WRITE : NONE)); + } + return AK; + } + + /// Update the state \p State and the AccessKind2Accesses given that \p I is + /// an access of kind \p AK to a \p MLK memory location with the access + /// pointer \p Ptr. + void updateStateAndAccessesMap(AAMemoryLocation::StateType &State, + MemoryLocationsKind MLK, const Instruction *I, + const Value *Ptr, bool &Changed, + AccessKind AK = READ_WRITE) { + + assert(isPowerOf2_32(MLK) && "Expected a single location set!"); + auto *&Accesses = AccessKind2Accesses[llvm::Log2_32(MLK)]; + if (!Accesses) + Accesses = new (Allocator) AccessSet(); + Changed |= Accesses->insert(AccessInfo{I, Ptr, AK}).second; + State.removeAssumedBits(MLK); + } + + /// Determine the underlying locations kinds for \p Ptr, e.g., globals or + /// arguments, and update the state and access map accordingly. + void categorizePtrValue(Attributor &A, const Instruction &I, const Value &Ptr, + AAMemoryLocation::StateType &State, bool &Changed); + + /// Used to allocate access sets. + BumpPtrAllocator &Allocator; + + /// The set of IR attributes AAMemoryLocation deals with. + static const Attribute::AttrKind AttrKinds[4]; +}; + +const Attribute::AttrKind AAMemoryLocationImpl::AttrKinds[] = { + Attribute::ReadNone, Attribute::InaccessibleMemOnly, Attribute::ArgMemOnly, + Attribute::InaccessibleMemOrArgMemOnly}; + +void AAMemoryLocationImpl::categorizePtrValue( + Attributor &A, const Instruction &I, const Value &Ptr, + AAMemoryLocation::StateType &State, bool &Changed) { + LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize pointer locations for " + << Ptr << " [" + << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); + + auto StripGEPCB = [](Value *V) -> Value * { + auto *GEP = dyn_cast<GEPOperator>(V); + while (GEP) { + V = GEP->getPointerOperand(); + GEP = dyn_cast<GEPOperator>(V); + } + return V; + }; + + auto VisitValueCB = [&](Value &V, const Instruction *, + AAMemoryLocation::StateType &T, + bool Stripped) -> bool { + MemoryLocationsKind MLK = NO_LOCATIONS; + assert(!isa<GEPOperator>(V) && "GEPs should have been stripped."); + if (isa<UndefValue>(V)) + return true; + if (auto *Arg = dyn_cast<Argument>(&V)) { + if (Arg->hasByValAttr()) + MLK = NO_LOCAL_MEM; + else + MLK = NO_ARGUMENT_MEM; + } else if (auto *GV = dyn_cast<GlobalValue>(&V)) { + if (GV->hasLocalLinkage()) + MLK = NO_GLOBAL_INTERNAL_MEM; + else + MLK = NO_GLOBAL_EXTERNAL_MEM; + } else if (isa<ConstantPointerNull>(V) && + !NullPointerIsDefined(getAssociatedFunction(), + V.getType()->getPointerAddressSpace())) { + return true; + } else if (isa<AllocaInst>(V)) { + MLK = NO_LOCAL_MEM; + } else if (const auto *CB = dyn_cast<CallBase>(&V)) { + const auto &NoAliasAA = + A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(*CB)); + if (NoAliasAA.isAssumedNoAlias()) + MLK = NO_MALLOCED_MEM; + else + MLK = NO_UNKOWN_MEM; + } else { + MLK = NO_UNKOWN_MEM; + } + + assert(MLK != NO_LOCATIONS && "No location specified!"); + updateStateAndAccessesMap(T, MLK, &I, &V, Changed, + getAccessKindFromInst(&I)); + LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value cannot be categorized: " + << V << " -> " << getMemoryLocationsAsStr(T.getAssumed()) + << "\n"); + return true; + }; + + if (!genericValueTraversal<AAMemoryLocation, AAMemoryLocation::StateType>( + A, IRPosition::value(Ptr), *this, State, VisitValueCB, getCtxI(), + /* UseValueSimplify */ true, + /* MaxValues */ 32, StripGEPCB)) { + LLVM_DEBUG( + dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n"); + updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed, + getAccessKindFromInst(&I)); + } else { + LLVM_DEBUG( + dbgs() + << "[AAMemoryLocation] Accessed locations with pointer locations: " + << getMemoryLocationsAsStr(State.getAssumed()) << "\n"); + } +} + +AAMemoryLocation::MemoryLocationsKind +AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I, + bool &Changed) { + LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize accessed locations for " + << I << "\n"); + + AAMemoryLocation::StateType AccessedLocs; + AccessedLocs.intersectAssumedBits(NO_LOCATIONS); + + if (auto *CB = dyn_cast<CallBase>(&I)) { + + // First check if we assume any memory is access is visible. + const auto &CBMemLocationAA = + A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(*CB)); + LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I + << " [" << CBMemLocationAA << "]\n"); + + if (CBMemLocationAA.isAssumedReadNone()) + return NO_LOCATIONS; + + if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) { + updateStateAndAccessesMap(AccessedLocs, NO_INACCESSIBLE_MEM, &I, nullptr, + Changed, getAccessKindFromInst(&I)); + return AccessedLocs.getAssumed(); + } + + uint32_t CBAssumedNotAccessedLocs = + CBMemLocationAA.getAssumedNotAccessedLocation(); + + // Set the argmemonly and global bit as we handle them separately below. + uint32_t CBAssumedNotAccessedLocsNoArgMem = + CBAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM; + + for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) { + if (CBAssumedNotAccessedLocsNoArgMem & CurMLK) + continue; + updateStateAndAccessesMap(AccessedLocs, CurMLK, &I, nullptr, Changed, + getAccessKindFromInst(&I)); + } + + // Now handle global memory if it might be accessed. This is slightly tricky + // as NO_GLOBAL_MEM has multiple bits set. + bool HasGlobalAccesses = ((~CBAssumedNotAccessedLocs) & NO_GLOBAL_MEM); + if (HasGlobalAccesses) { + auto AccessPred = [&](const Instruction *, const Value *Ptr, + AccessKind Kind, MemoryLocationsKind MLK) { + updateStateAndAccessesMap(AccessedLocs, MLK, &I, Ptr, Changed, + getAccessKindFromInst(&I)); + return true; + }; + if (!CBMemLocationAA.checkForAllAccessesToMemoryKind( + AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false))) + return AccessedLocs.getWorstState(); + } + + LLVM_DEBUG( + dbgs() << "[AAMemoryLocation] Accessed state before argument handling: " + << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n"); + + // Now handle argument memory if it might be accessed. + bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM); + if (HasArgAccesses) { + for (unsigned ArgNo = 0, E = CB->getNumArgOperands(); ArgNo < E; + ++ArgNo) { + + // Skip non-pointer arguments. + const Value *ArgOp = CB->getArgOperand(ArgNo); + if (!ArgOp->getType()->isPtrOrPtrVectorTy()) + continue; + + // Skip readnone arguments. + const IRPosition &ArgOpIRP = IRPosition::callsite_argument(*CB, ArgNo); + const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>( + *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL); + + if (ArgOpMemLocationAA.isAssumedReadNone()) + continue; + + // Categorize potentially accessed pointer arguments as if there was an + // access instruction with them as pointer. + categorizePtrValue(A, I, *ArgOp, AccessedLocs, Changed); + } + } + + LLVM_DEBUG( + dbgs() << "[AAMemoryLocation] Accessed state after argument handling: " + << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n"); + + return AccessedLocs.getAssumed(); + } + + if (const Value *Ptr = getPointerOperand(&I, /* AllowVolatile */ true)) { + LLVM_DEBUG( + dbgs() << "[AAMemoryLocation] Categorize memory access with pointer: " + << I << " [" << *Ptr << "]\n"); + categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed); + return AccessedLocs.getAssumed(); + } + + LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Failed to categorize instruction: " + << I << "\n"); + updateStateAndAccessesMap(AccessedLocs, NO_UNKOWN_MEM, &I, nullptr, Changed, + getAccessKindFromInst(&I)); + return AccessedLocs.getAssumed(); +} + +/// An AA to represent the memory behavior function attributes. +struct AAMemoryLocationFunction final : public AAMemoryLocationImpl { + AAMemoryLocationFunction(const IRPosition &IRP, Attributor &A) + : AAMemoryLocationImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override { + + const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>( + *this, getIRPosition(), /* TrackDependence */ false); + if (MemBehaviorAA.isAssumedReadNone()) { + if (MemBehaviorAA.isKnownReadNone()) + return indicateOptimisticFixpoint(); + assert(isAssumedReadNone() && + "AAMemoryLocation was not read-none but AAMemoryBehavior was!"); + A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL); + return ChangeStatus::UNCHANGED; + } + + // The current assumed state used to determine a change. + auto AssumedState = getAssumed(); + bool Changed = false; + + auto CheckRWInst = [&](Instruction &I) { + MemoryLocationsKind MLK = categorizeAccessedLocations(A, I, Changed); + LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Accessed locations for " << I + << ": " << getMemoryLocationsAsStr(MLK) << "\n"); + removeAssumedBits(inverseLocation(MLK, false, false)); + return true; + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) + return indicatePessimisticFixpoint(); + + Changed |= AssumedState != getAssumed(); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FN_ATTR(readnone) + else if (isAssumedArgMemOnly()) + STATS_DECLTRACK_FN_ATTR(argmemonly) + else if (isAssumedInaccessibleMemOnly()) + STATS_DECLTRACK_FN_ATTR(inaccessiblememonly) + else if (isAssumedInaccessibleOrArgMemOnly()) + STATS_DECLTRACK_FN_ATTR(inaccessiblememorargmemonly) + } +}; + +/// AAMemoryLocation attribute for call sites. +struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { + AAMemoryLocationCallSite(const IRPosition &IRP, Attributor &A) + : AAMemoryLocationImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryLocationImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || !A.isFunctionIPOAmendable(*F)) { + indicatePessimisticFixpoint(); + return; + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor<AAMemoryLocation>(*this, FnPos); + bool Changed = false; + auto AccessPred = [&](const Instruction *I, const Value *Ptr, + AccessKind Kind, MemoryLocationsKind MLK) { + updateStateAndAccessesMap(getState(), MLK, I, Ptr, Changed, + getAccessKindFromInst(I)); + return true; + }; + if (!FnAA.checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS)) + return indicatePessimisticFixpoint(); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CS_ATTR(readnone) + } +}; + +/// ------------------ Value Constant Range Attribute ------------------------- + +struct AAValueConstantRangeImpl : AAValueConstantRange { + using StateType = IntegerRangeState; + AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A) + : AAValueConstantRange(IRP, A) {} + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + std::string Str; + llvm::raw_string_ostream OS(Str); + OS << "range(" << getBitWidth() << ")<"; + getKnown().print(OS); + OS << " / "; + getAssumed().print(OS); + OS << ">"; + return OS.str(); + } + + /// Helper function to get a SCEV expr for the associated value at program + /// point \p I. + const SCEV *getSCEV(Attributor &A, const Instruction *I = nullptr) const { + if (!getAnchorScope()) + return nullptr; + + ScalarEvolution *SE = + A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>( + *getAnchorScope()); + + LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>( + *getAnchorScope()); + + if (!SE || !LI) + return nullptr; + + const SCEV *S = SE->getSCEV(&getAssociatedValue()); + if (!I) + return S; + + return SE->getSCEVAtScope(S, LI->getLoopFor(I->getParent())); + } + + /// Helper function to get a range from SCEV for the associated value at + /// program point \p I. + ConstantRange getConstantRangeFromSCEV(Attributor &A, + const Instruction *I = nullptr) const { + if (!getAnchorScope()) + return getWorstState(getBitWidth()); + + ScalarEvolution *SE = + A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>( + *getAnchorScope()); + + const SCEV *S = getSCEV(A, I); + if (!SE || !S) + return getWorstState(getBitWidth()); + + return SE->getUnsignedRange(S); + } + + /// Helper function to get a range from LVI for the associated value at + /// program point \p I. + ConstantRange + getConstantRangeFromLVI(Attributor &A, + const Instruction *CtxI = nullptr) const { + if (!getAnchorScope()) + return getWorstState(getBitWidth()); + + LazyValueInfo *LVI = + A.getInfoCache().getAnalysisResultForFunction<LazyValueAnalysis>( + *getAnchorScope()); + + if (!LVI || !CtxI) + return getWorstState(getBitWidth()); + return LVI->getConstantRange(&getAssociatedValue(), + const_cast<BasicBlock *>(CtxI->getParent()), + const_cast<Instruction *>(CtxI)); + } + + /// See AAValueConstantRange::getKnownConstantRange(..). + ConstantRange + getKnownConstantRange(Attributor &A, + const Instruction *CtxI = nullptr) const override { + if (!CtxI || CtxI == getCtxI()) + return getKnown(); + + ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI); + ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI); + return getKnown().intersectWith(SCEVR).intersectWith(LVIR); + } + + /// See AAValueConstantRange::getAssumedConstantRange(..). + ConstantRange + getAssumedConstantRange(Attributor &A, + const Instruction *CtxI = nullptr) const override { + // TODO: Make SCEV use Attributor assumption. + // We may be able to bound a variable range via assumptions in + // Attributor. ex.) If x is assumed to be in [1, 3] and y is known to + // evolve to x^2 + x, then we can say that y is in [2, 12]. + + if (!CtxI || CtxI == getCtxI()) + return getAssumed(); + + ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI); + ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI); + return getAssumed().intersectWith(SCEVR).intersectWith(LVIR); + } + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + // Intersect a range given by SCEV. + intersectKnown(getConstantRangeFromSCEV(A, getCtxI())); + + // Intersect a range given by LVI. + intersectKnown(getConstantRangeFromLVI(A, getCtxI())); + } + + /// Helper function to create MDNode for range metadata. + static MDNode * + getMDNodeForConstantRange(Type *Ty, LLVMContext &Ctx, + const ConstantRange &AssumedConstantRange) { + Metadata *LowAndHigh[] = {ConstantAsMetadata::get(ConstantInt::get( + Ty, AssumedConstantRange.getLower())), + ConstantAsMetadata::get(ConstantInt::get( + Ty, AssumedConstantRange.getUpper()))}; + return MDNode::get(Ctx, LowAndHigh); + } + + /// Return true if \p Assumed is included in \p KnownRanges. + static bool isBetterRange(const ConstantRange &Assumed, MDNode *KnownRanges) { + + if (Assumed.isFullSet()) + return false; + + if (!KnownRanges) + return true; + + // If multiple ranges are annotated in IR, we give up to annotate assumed + // range for now. + + // TODO: If there exists a known range which containts assumed range, we + // can say assumed range is better. + if (KnownRanges->getNumOperands() > 2) + return false; + + ConstantInt *Lower = + mdconst::extract<ConstantInt>(KnownRanges->getOperand(0)); + ConstantInt *Upper = + mdconst::extract<ConstantInt>(KnownRanges->getOperand(1)); + + ConstantRange Known(Lower->getValue(), Upper->getValue()); + return Known.contains(Assumed) && Known != Assumed; + } + + /// Helper function to set range metadata. + static bool + setRangeMetadataIfisBetterRange(Instruction *I, + const ConstantRange &AssumedConstantRange) { + auto *OldRangeMD = I->getMetadata(LLVMContext::MD_range); + if (isBetterRange(AssumedConstantRange, OldRangeMD)) { + if (!AssumedConstantRange.isEmptySet()) { + I->setMetadata(LLVMContext::MD_range, + getMDNodeForConstantRange(I->getType(), I->getContext(), + AssumedConstantRange)); + return true; + } + } + return false; + } + + /// See AbstractAttribute::manifest() + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + ConstantRange AssumedConstantRange = getAssumedConstantRange(A); + assert(!AssumedConstantRange.isFullSet() && "Invalid state"); + + auto &V = getAssociatedValue(); + if (!AssumedConstantRange.isEmptySet() && + !AssumedConstantRange.isSingleElement()) { + if (Instruction *I = dyn_cast<Instruction>(&V)) + if (isa<CallInst>(I) || isa<LoadInst>(I)) + if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange)) + Changed = ChangeStatus::CHANGED; + } + + return Changed; + } +}; + +struct AAValueConstantRangeArgument final + : AAArgumentFromCallSiteArguments< + AAValueConstantRange, AAValueConstantRangeImpl, IntegerRangeState> { + using Base = AAArgumentFromCallSiteArguments< + AAValueConstantRange, AAValueConstantRangeImpl, IntegerRangeState>; + AAValueConstantRangeArgument(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (!getAnchorScope() || getAnchorScope()->isDeclaration()) { + indicatePessimisticFixpoint(); + } else { + Base::initialize(A); + } + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(value_range) + } +}; + +struct AAValueConstantRangeReturned + : AAReturnedFromReturnedValues<AAValueConstantRange, + AAValueConstantRangeImpl> { + using Base = AAReturnedFromReturnedValues<AAValueConstantRange, + AAValueConstantRangeImpl>; + AAValueConstantRangeReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(value_range) + } +}; + +struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { + AAValueConstantRangeFloating(const IRPosition &IRP, Attributor &A) + : AAValueConstantRangeImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAValueConstantRangeImpl::initialize(A); + Value &V = getAssociatedValue(); + + if (auto *C = dyn_cast<ConstantInt>(&V)) { + unionAssumed(ConstantRange(C->getValue())); + indicateOptimisticFixpoint(); + return; + } + + if (isa<UndefValue>(&V)) { + // Collapse the undef state to 0. + unionAssumed(ConstantRange(APInt(getBitWidth(), 0))); + indicateOptimisticFixpoint(); + return; + } + + if (isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<CastInst>(&V)) + return; + // If it is a load instruction with range metadata, use it. + if (LoadInst *LI = dyn_cast<LoadInst>(&V)) + if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) { + intersectKnown(getConstantRangeFromMetadata(*RangeMD)); + return; + } + + // We can work with PHI and select instruction as we traverse their operands + // during update. + if (isa<SelectInst>(V) || isa<PHINode>(V)) + return; + + // Otherwise we give up. + indicatePessimisticFixpoint(); + + LLVM_DEBUG(dbgs() << "[AAValueConstantRange] We give up: " + << getAssociatedValue() << "\n"); + } + + bool calculateBinaryOperator( + Attributor &A, BinaryOperator *BinOp, IntegerRangeState &T, + const Instruction *CtxI, + SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) { + Value *LHS = BinOp->getOperand(0); + Value *RHS = BinOp->getOperand(1); + // TODO: Allow non integers as well. + if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) + return false; + + auto &LHSAA = + A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS)); + QuerriedAAs.push_back(&LHSAA); + auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI); + + auto &RHSAA = + A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS)); + QuerriedAAs.push_back(&RHSAA); + auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI); + + auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange); + + T.unionAssumed(AssumedRange); + + // TODO: Track a known state too. + + return T.isValidState(); + } + + bool calculateCastInst( + Attributor &A, CastInst *CastI, IntegerRangeState &T, + const Instruction *CtxI, + SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) { + assert(CastI->getNumOperands() == 1 && "Expected cast to be unary!"); + // TODO: Allow non integers as well. + Value &OpV = *CastI->getOperand(0); + if (!OpV.getType()->isIntegerTy()) + return false; + + auto &OpAA = + A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(OpV)); + QuerriedAAs.push_back(&OpAA); + T.unionAssumed( + OpAA.getAssumed().castOp(CastI->getOpcode(), getState().getBitWidth())); + return T.isValidState(); + } + + bool + calculateCmpInst(Attributor &A, CmpInst *CmpI, IntegerRangeState &T, + const Instruction *CtxI, + SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) { + Value *LHS = CmpI->getOperand(0); + Value *RHS = CmpI->getOperand(1); + // TODO: Allow non integers as well. + if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) + return false; + + auto &LHSAA = + A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS)); + QuerriedAAs.push_back(&LHSAA); + auto &RHSAA = + A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS)); + QuerriedAAs.push_back(&RHSAA); + + auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI); + auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI); + + // If one of them is empty set, we can't decide. + if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet()) + return true; + + bool MustTrue = false, MustFalse = false; + + auto AllowedRegion = + ConstantRange::makeAllowedICmpRegion(CmpI->getPredicate(), RHSAARange); + + auto SatisfyingRegion = ConstantRange::makeSatisfyingICmpRegion( + CmpI->getPredicate(), RHSAARange); + + if (AllowedRegion.intersectWith(LHSAARange).isEmptySet()) + MustFalse = true; + + if (SatisfyingRegion.contains(LHSAARange)) + MustTrue = true; + + assert((!MustTrue || !MustFalse) && + "Either MustTrue or MustFalse should be false!"); + + if (MustTrue) + T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 1))); + else if (MustFalse) + T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 0))); + else + T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true)); + + LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA + << " " << RHSAA << "\n"); + + // TODO: Track a known state too. + return T.isValidState(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto VisitValueCB = [&](Value &V, const Instruction *CtxI, + IntegerRangeState &T, bool Stripped) -> bool { + Instruction *I = dyn_cast<Instruction>(&V); + if (!I || isa<CallBase>(I)) { + + // If the value is not instruction, we query AA to Attributor. + const auto &AA = + A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(V)); + + // Clamp operator is not used to utilize a program point CtxI. + T.unionAssumed(AA.getAssumedConstantRange(A, CtxI)); + + return T.isValidState(); + } + + SmallVector<const AAValueConstantRange *, 4> QuerriedAAs; + if (auto *BinOp = dyn_cast<BinaryOperator>(I)) { + if (!calculateBinaryOperator(A, BinOp, T, CtxI, QuerriedAAs)) + return false; + } else if (auto *CmpI = dyn_cast<CmpInst>(I)) { + if (!calculateCmpInst(A, CmpI, T, CtxI, QuerriedAAs)) + return false; + } else if (auto *CastI = dyn_cast<CastInst>(I)) { + if (!calculateCastInst(A, CastI, T, CtxI, QuerriedAAs)) + return false; + } else { + // Give up with other instructions. + // TODO: Add other instructions + + T.indicatePessimisticFixpoint(); + return false; + } + + // Catch circular reasoning in a pessimistic way for now. + // TODO: Check how the range evolves and if we stripped anything, see also + // AADereferenceable or AAAlign for similar situations. + for (const AAValueConstantRange *QueriedAA : QuerriedAAs) { + if (QueriedAA != this) + continue; + // If we are in a stady state we do not need to worry. + if (T.getAssumed() == getState().getAssumed()) + continue; + T.indicatePessimisticFixpoint(); + } + + return T.isValidState(); + }; + + IntegerRangeState T(getBitWidth()); + + if (!genericValueTraversal<AAValueConstantRange, IntegerRangeState>( + A, getIRPosition(), *this, T, VisitValueCB, getCtxI(), + /* UseValueSimplify */ false)) + return indicatePessimisticFixpoint(); + + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(value_range) + } +}; + +struct AAValueConstantRangeFunction : AAValueConstantRangeImpl { + AAValueConstantRangeFunction(const IRPosition &IRP, Attributor &A) + : AAValueConstantRangeImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAValueConstantRange(Function|CallSite)::updateImpl will " + "not be called"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(value_range) } +}; + +struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction { + AAValueConstantRangeCallSite(const IRPosition &IRP, Attributor &A) + : AAValueConstantRangeFunction(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(value_range) } +}; + +struct AAValueConstantRangeCallSiteReturned + : AACallSiteReturnedFromReturned<AAValueConstantRange, + AAValueConstantRangeImpl> { + AAValueConstantRangeCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AACallSiteReturnedFromReturned<AAValueConstantRange, + AAValueConstantRangeImpl>(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // If it is a load instruction with range metadata, use the metadata. + if (CallInst *CI = dyn_cast<CallInst>(&getAssociatedValue())) + if (auto *RangeMD = CI->getMetadata(LLVMContext::MD_range)) + intersectKnown(getConstantRangeFromMetadata(*RangeMD)); + + AAValueConstantRangeImpl::initialize(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(value_range) + } +}; +struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { + AAValueConstantRangeCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAValueConstantRangeFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(value_range) + } +}; +} // namespace + +const char AAReturnedValues::ID = 0; +const char AANoUnwind::ID = 0; +const char AANoSync::ID = 0; +const char AANoFree::ID = 0; +const char AANonNull::ID = 0; +const char AANoRecurse::ID = 0; +const char AAWillReturn::ID = 0; +const char AAUndefinedBehavior::ID = 0; +const char AANoAlias::ID = 0; +const char AAReachability::ID = 0; +const char AANoReturn::ID = 0; +const char AAIsDead::ID = 0; +const char AADereferenceable::ID = 0; +const char AAAlign::ID = 0; +const char AANoCapture::ID = 0; +const char AAValueSimplify::ID = 0; +const char AAHeapToStack::ID = 0; +const char AAPrivatizablePtr::ID = 0; +const char AAMemoryBehavior::ID = 0; +const char AAMemoryLocation::ID = 0; +const char AAValueConstantRange::ID = 0; + +// Macro magic to create the static generator function for attributes that +// follow the naming scheme. + +#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \ + case IRPosition::PK: \ + llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!"); + +#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \ + case IRPosition::PK: \ + AA = new (A.Allocator) CLASS##SUFFIX(IRP, A); \ + ++NumAAs; \ + break; + +#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + } \ + return *AA; \ + } + +#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + } \ + return *AA; \ + } + +#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation) + +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) + +CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) +CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) +CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree) + +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability) +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior) + +CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) + +#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef SWITCH_PK_CREATE +#undef SWITCH_PK_INV diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index aec470ffadc43..1d1300c6cd1d0 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -127,7 +127,8 @@ void BlockExtractor::loadFile() { /*KeepEmpty=*/false); if (BBNames.empty()) report_fatal_error("Missing bbs name"); - BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}}); + BlocksByName.push_back( + {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}}); } } diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index f28a399b1779b..74f11fa309592 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -19,7 +19,6 @@ #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Analysis/SparsePropagation.h" #include "llvm/Analysis/ValueLatticeUtils.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/MDBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -72,8 +71,7 @@ public: CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {} CVPLatticeVal(std::vector<Function *> &&Functions) : LatticeState(FunctionSet), Functions(std::move(Functions)) { - assert(std::is_sorted(this->Functions.begin(), this->Functions.end(), - Compare())); + assert(llvm::is_sorted(this->Functions, Compare())); } /// Get a reference to the functions held by this lattice value. The number @@ -173,9 +171,8 @@ public: SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) override { switch (I.getOpcode()) { case Instruction::Call: - return visitCallSite(cast<CallInst>(&I), ChangedValues, SS); case Instruction::Invoke: - return visitCallSite(cast<InvokeInst>(&I), ChangedValues, SS); + return visitCallBase(cast<CallBase>(I), ChangedValues, SS); case Instruction::Load: return visitLoad(*cast<LoadInst>(&I), ChangedValues, SS); case Instruction::Ret: @@ -217,13 +214,13 @@ public: /// We collect a set of indirect calls when visiting call sites. This method /// returns a reference to that set. - SmallPtrSetImpl<Instruction *> &getIndirectCalls() { return IndirectCalls; } + SmallPtrSetImpl<CallBase *> &getIndirectCalls() { return IndirectCalls; } private: /// Holds the indirect calls we encounter during the analysis. We will attach /// metadata to these calls after the analysis indicating the functions the /// calls can possibly target. - SmallPtrSet<Instruction *, 32> IndirectCalls; + SmallPtrSet<CallBase *, 32> IndirectCalls; /// Compute a new lattice value for the given constant. The constant, after /// stripping any pointer casts, should be a Function. We ignore null @@ -255,23 +252,22 @@ private: /// the merge of the argument state with the call sites corresponding actual /// argument state. The call site state is the merge of the call site state /// with the returned value state of the called function. - void visitCallSite(CallSite CS, + void visitCallBase(CallBase &CB, DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues, SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) { - Function *F = CS.getCalledFunction(); - Instruction *I = CS.getInstruction(); - auto RegI = CVPLatticeKey(I, IPOGrouping::Register); + Function *F = CB.getCalledFunction(); + auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register); // If this is an indirect call, save it so we can quickly revisit it when // attaching metadata. if (!F) - IndirectCalls.insert(I); + IndirectCalls.insert(&CB); // If we can't track the function's return values, there's nothing to do. if (!F || !canTrackReturnsInterprocedurally(F)) { // Void return, No need to create and update CVPLattice state as no one // can use it. - if (I->getType()->isVoidTy()) + if (CB.getType()->isVoidTy()) return; ChangedValues[RegI] = getOverdefinedVal(); return; @@ -284,14 +280,14 @@ private: for (Argument &A : F->args()) { auto RegFormal = CVPLatticeKey(&A, IPOGrouping::Register); auto RegActual = - CVPLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register); + CVPLatticeKey(CB.getArgOperand(A.getArgNo()), IPOGrouping::Register); ChangedValues[RegFormal] = MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual)); } // Void return, No need to create and update CVPLattice state as no one can // use it. - if (I->getType()->isVoidTy()) + if (CB.getType()->isVoidTy()) return; ChangedValues[RegI] = @@ -388,9 +384,8 @@ static bool runCVP(Module &M) { // the set of functions they can possibly target. bool Changed = false; MDBuilder MDB(M.getContext()); - for (Instruction *C : Lattice.getIndirectCalls()) { - CallSite CS(C); - auto RegI = CVPLatticeKey(CS.getCalledValue(), IPOGrouping::Register); + for (CallBase *C : Lattice.getIndirectCalls()) { + auto RegI = CVPLatticeKey(C->getCalledOperand(), IPOGrouping::Register); CVPLatticeVal LV = Solver.getExistingValueState(RegI); if (!LV.isFunctionSet() || LV.getFunctions().empty()) continue; diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp index ea1278aa108fb..67f1438b9b6ac 100644 --- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -84,11 +84,9 @@ static void copyDebugLocMetadata(const GlobalVariable *From, To->addDebugInfo(MD); } -static unsigned getAlignment(GlobalVariable *GV) { - unsigned Align = GV->getAlignment(); - if (Align) - return Align; - return GV->getParent()->getDataLayout().getPreferredAlignment(GV); +static Align getAlign(GlobalVariable *GV) { + return GV->getAlign().getValueOr( + GV->getParent()->getDataLayout().getPreferredAlign(GV)); } static bool @@ -120,8 +118,8 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { << New->getName() << "\n"); // Bump the alignment if necessary. - if (Old->getAlignment() || New->getAlignment()) - New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New)))); + if (Old->getAlign() || New->getAlign()) + New->setAlignment(std::max(getAlign(Old), getAlign(New))); copyDebugLocMetadata(Old, New); Old->replaceAllUsesWith(NewConstant); diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 61d519d8ae880..54c51b6e7161b 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -22,16 +22,17 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -175,16 +176,15 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // std::vector<Value *> Args; for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) { - CallSite CS(*I++); - if (!CS) + CallBase *CB = dyn_cast<CallBase>(*I++); + if (!CB) continue; - Instruction *Call = CS.getInstruction(); // Pass all the same arguments. - Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); + Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs); // Drop any attributes that were on the vararg arguments. - AttributeList PAL = CS.getAttributes(); + AttributeList PAL = CB->getAttributes(); if (!PAL.isEmpty()) { SmallVector<AttributeSet, 8> ArgAttrs; for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) @@ -194,34 +194,31 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { } SmallVector<OperandBundleDef, 1> OpBundles; - CS.getOperandBundlesAsDefs(OpBundles); + CB->getOperandBundlesAsDefs(OpBundles); - CallSite NewCS; - if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call); + CallBase *NewCB = nullptr; + if (InvokeInst *II = dyn_cast<InvokeInst>(CB)) { + NewCB = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", CB); } else { - NewCS = CallInst::Create(NF, Args, OpBundles, "", Call); - cast<CallInst>(NewCS.getInstruction()) - ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); + NewCB = CallInst::Create(NF, Args, OpBundles, "", CB); + cast<CallInst>(NewCB)->setTailCallKind( + cast<CallInst>(CB)->getTailCallKind()); } - NewCS.setCallingConv(CS.getCallingConv()); - NewCS.setAttributes(PAL); - NewCS->setDebugLoc(Call->getDebugLoc()); - uint64_t W; - if (Call->extractProfTotalWeight(W)) - NewCS->setProfWeight(W); + NewCB->setCallingConv(CB->getCallingConv()); + NewCB->setAttributes(PAL); + NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); Args.clear(); - if (!Call->use_empty()) - Call->replaceAllUsesWith(NewCS.getInstruction()); + if (!CB->use_empty()) + CB->replaceAllUsesWith(NewCB); - NewCS->takeName(Call); + NewCB->takeName(CB); // Finally, remove the old call from the program, reducing the use-count of // F. - Call->eraseFromParent(); + CB->eraseFromParent(); } // Since we have now created the new function, splice the body of the old @@ -291,7 +288,8 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { bool Changed = false; for (Argument &Arg : Fn.args()) { - if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) { + if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && + !Arg.hasPassPointeeByValueAttr()) { if (Arg.isUsedByMetadata()) { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); Changed = true; @@ -304,16 +302,16 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { return false; for (Use &U : Fn.uses()) { - CallSite CS(U.getUser()); - if (!CS || !CS.isCallee(&U)) + CallBase *CB = dyn_cast<CallBase>(U.getUser()); + if (!CB || !CB->isCallee(&U)) continue; // Now go through all unused args and replace them with "undef". for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) { unsigned ArgNo = UnusedArgs[I]; - Value *Arg = CS.getArgument(ArgNo); - CS.setArgument(ArgNo, UndefValue::get(Arg->getType())); + Value *Arg = CB->getArgOperand(ArgNo); + CB->setArgOperand(ArgNo, UndefValue::get(Arg->getType())); ++NumArgumentsReplacedWithUndef; Changed = true; } @@ -391,8 +389,8 @@ DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses, return MarkIfNotLive(Use, MaybeLiveUses); } else { DeadArgumentEliminationPass::Liveness Result = MaybeLive; - for (unsigned i = 0; i < NumRetVals(F); ++i) { - RetOrArg Use = CreateRet(F, i); + for (unsigned Ri = 0; Ri < NumRetVals(F); ++Ri) { + RetOrArg Use = CreateRet(F, Ri); // We might be live, depending on the liveness of Use. If any // sub-value is live, then the entire value is considered live. This // is a conservative choice, and better tracking is possible. @@ -424,28 +422,27 @@ DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses, return Result; } - if (auto CS = ImmutableCallSite(V)) { - const Function *F = CS.getCalledFunction(); + if (const auto *CB = dyn_cast<CallBase>(V)) { + const Function *F = CB->getCalledFunction(); if (F) { // Used in a direct call. // The function argument is live if it is used as a bundle operand. - if (CS.isBundleOperand(U)) + if (CB->isBundleOperand(U)) return Live; // Find the argument number. We know for sure that this use is an // argument, since if it was the function argument this would be an // indirect call and the we know can't be looking at a value of the // label type (for the invoke instruction). - unsigned ArgNo = CS.getArgumentNo(U); + unsigned ArgNo = CB->getArgOperandNo(U); if (ArgNo >= F->getFunctionType()->getNumParams()) // The value is passed in through a vararg! Must be live. return Live; - assert(CS.getArgument(ArgNo) - == CS->getOperand(U->getOperandNo()) - && "Argument is not where we expected it"); + assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) && + "Argument is not where we expected it"); // Value passed to a normal call. It's only live when the corresponding // argument to the called function turns out live. @@ -485,9 +482,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V, // We consider arguments of non-internal functions to be intrinsically alive as // well as arguments to functions which have their "address taken". void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { - // Functions with inalloca parameters are expecting args in a particular - // register and memory layout. - if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) { + // Functions with inalloca/preallocated parameters are expecting args in a + // particular register and memory layout. + if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || + F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { MarkLive(F); return; } @@ -555,24 +553,17 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { for (const Use &U : F.uses()) { // If the function is PASSED IN as an argument, its address has been // taken. - ImmutableCallSite CS(U.getUser()); - if (!CS || !CS.isCallee(&U)) { + const auto *CB = dyn_cast<CallBase>(U.getUser()); + if (!CB || !CB->isCallee(&U)) { MarkLive(F); return; } // The number of arguments for `musttail` call must match the number of // arguments of the caller - if (CS.isMustTailCall()) + if (CB->isMustTailCall()) HasMustTailCallers = true; - // If this use is anything other than a call site, the function is alive. - const Instruction *TheCall = CS.getInstruction(); - if (!TheCall) { // Not a direct call site? - MarkLive(F); - return; - } - // If we end up here, we are looking at a direct call to our function. // Now, check how our return value(s) is/are used in this caller. Don't @@ -581,7 +572,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { continue; // Check all uses of the return value. - for (const Use &U : TheCall->uses()) { + for (const Use &U : CB->uses()) { if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) { // This use uses a part of our return value, survey the uses of // that part and store the results for this index only. @@ -600,10 +591,10 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { RetValLiveness.assign(RetCount, Live); break; } else { - for (unsigned i = 0; i != RetCount; ++i) { - if (RetValLiveness[i] != Live) - MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(), - MaybeLiveAggregateUses.end()); + for (unsigned Ri = 0; Ri != RetCount; ++Ri) { + if (RetValLiveness[Ri] != Live) + MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(), + MaybeLiveAggregateUses.end()); } } } @@ -616,17 +607,17 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } // Now we've inspected all callers, record the liveness of our return values. - for (unsigned i = 0; i != RetCount; ++i) - MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); + for (unsigned Ri = 0; Ri != RetCount; ++Ri) + MarkValue(CreateRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]); LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: " << F.getName() << "\n"); // Now, check all of our arguments. - unsigned i = 0; + unsigned ArgI = 0; UseVector MaybeLiveArgUses; - for (Function::const_arg_iterator AI = F.arg_begin(), - E = F.arg_end(); AI != E; ++AI, ++i) { + for (Function::const_arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI, ++ArgI) { Liveness Result; if (F.getFunctionType()->isVarArg() || HasMustTailCallers || HasMustTailCalls) { @@ -649,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } // Mark the result. - MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses); + MarkValue(CreateArg(&F, ArgI), Result, MaybeLiveArgUses); // Clear the vector again for the next iteration. MaybeLiveArgUses.clear(); } @@ -684,11 +675,11 @@ void DeadArgumentEliminationPass::MarkLive(const Function &F) { // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. - for (unsigned i = 0, e = F.arg_size(); i != e; ++i) - PropagateLiveness(CreateArg(&F, i)); + for (unsigned ArgI = 0, E = F.arg_size(); ArgI != E; ++ArgI) + PropagateLiveness(CreateArg(&F, ArgI)); // Mark all return values as live. - for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) - PropagateLiveness(CreateRet(&F, i)); + for (unsigned Ri = 0, E = NumRetVals(&F); Ri != E; ++Ri) + PropagateLiveness(CreateRet(&F, Ri)); } /// MarkLive - Mark the given return value or argument as live. Additionally, @@ -749,19 +740,19 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Construct the new parameter list from non-dead arguments. Also construct // a new set of parameter attributes to correspond. Skip the first parameter // attribute, since that belongs to the return value. - unsigned i = 0; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++i) { - RetOrArg Arg = CreateArg(F, i); + unsigned ArgI = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgI) { + RetOrArg Arg = CreateArg(F, ArgI); if (LiveValues.erase(Arg)) { Params.push_back(I->getType()); - ArgAlive[i] = true; - ArgAttrVec.push_back(PAL.getParamAttributes(i)); - HasLiveReturnedArg |= PAL.hasParamAttribute(i, Attribute::Returned); + ArgAlive[ArgI] = true; + ArgAttrVec.push_back(PAL.getParamAttributes(ArgI)); + HasLiveReturnedArg |= PAL.hasParamAttribute(ArgI, Attribute::Returned); } else { ++NumArgumentsEliminated; LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " - << i << " (" << I->getName() << ") from " + << ArgI << " (" << I->getName() << ") from " << F->getName() << "\n"); } } @@ -798,16 +789,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { NRetTy = RetTy; } else { // Look at each of the original return values individually. - for (unsigned i = 0; i != RetCount; ++i) { - RetOrArg Ret = CreateRet(F, i); + for (unsigned Ri = 0; Ri != RetCount; ++Ri) { + RetOrArg Ret = CreateRet(F, Ri); if (LiveValues.erase(Ret)) { - RetTypes.push_back(getRetComponentType(F, i)); - NewRetIdxs[i] = RetTypes.size() - 1; + RetTypes.push_back(getRetComponentType(F, Ri)); + NewRetIdxs[Ri] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; LLVM_DEBUG( dbgs() << "DeadArgumentEliminationPass - Removing return value " - << i << " from " << F->getName() << "\n"); + << Ri << " from " << F->getName() << "\n"); } } if (RetTypes.size() > 1) { @@ -876,11 +867,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // to pass in a smaller number of arguments into the new function. std::vector<Value*> Args; while (!F->use_empty()) { - CallSite CS(F->user_back()); - Instruction *Call = CS.getInstruction(); + CallBase &CB = cast<CallBase>(*F->user_back()); ArgAttrVec.clear(); - const AttributeList &CallPAL = CS.getAttributes(); + const AttributeList &CallPAL = CB.getAttributes(); // Adjust the call return attributes in case the function was changed to // return void. @@ -890,15 +880,15 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. - CallSite::arg_iterator I = CS.arg_begin(); - unsigned i = 0; + auto I = CB.arg_begin(); + unsigned Pi = 0; // Loop over those operands, corresponding to the normal arguments to the // original function, and add those that are still alive. - for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i) - if (ArgAlive[i]) { + for (unsigned E = FTy->getNumParams(); Pi != E; ++I, ++Pi) + if (ArgAlive[Pi]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. - AttributeSet Attrs = CallPAL.getParamAttributes(i); + AttributeSet Attrs = CallPAL.getParamAttributes(Pi); if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) { // If the return type has changed, then get rid of 'returned' on the // call site. The alternative is to make all 'returned' attributes on @@ -915,9 +905,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { } // Push any varargs arguments on the list. Don't forget their attributes. - for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { + for (auto E = CB.arg_end(); I != E; ++I, ++Pi) { Args.push_back(*I); - ArgAttrVec.push_back(CallPAL.getParamAttributes(i)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(Pi)); } // Reconstruct the AttributesList based on the vector we constructed. @@ -932,44 +922,41 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { F->getContext(), FnAttrs, RetAttrs, ArgAttrVec); SmallVector<OperandBundleDef, 1> OpBundles; - CS.getOperandBundlesAsDefs(OpBundles); + CB.getOperandBundlesAsDefs(OpBundles); - CallSite NewCS; - if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call->getParent()); + CallBase *NewCB = nullptr; + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { + NewCB = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", CB.getParent()); } else { - NewCS = CallInst::Create(NFTy, NF, Args, OpBundles, "", Call); - cast<CallInst>(NewCS.getInstruction()) - ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); + NewCB = CallInst::Create(NFTy, NF, Args, OpBundles, "", &CB); + cast<CallInst>(NewCB)->setTailCallKind( + cast<CallInst>(&CB)->getTailCallKind()); } - NewCS.setCallingConv(CS.getCallingConv()); - NewCS.setAttributes(NewCallPAL); - NewCS->setDebugLoc(Call->getDebugLoc()); - uint64_t W; - if (Call->extractProfTotalWeight(W)) - NewCS->setProfWeight(W); + NewCB->setCallingConv(CB.getCallingConv()); + NewCB->setAttributes(NewCallPAL); + NewCB->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); Args.clear(); ArgAttrVec.clear(); - Instruction *New = NewCS.getInstruction(); - if (!Call->use_empty() || Call->isUsedByMetadata()) { - if (New->getType() == Call->getType()) { + if (!CB.use_empty() || CB.isUsedByMetadata()) { + if (NewCB->getType() == CB.getType()) { // Return type not changed? Just replace users then. - Call->replaceAllUsesWith(New); - New->takeName(Call); - } else if (New->getType()->isVoidTy()) { + CB.replaceAllUsesWith(NewCB); + NewCB->takeName(&CB); + } else if (NewCB->getType()->isVoidTy()) { // If the return value is dead, replace any uses of it with undef // (any non-debug value uses will get removed later on). - if (!Call->getType()->isX86_MMXTy()) - Call->replaceAllUsesWith(UndefValue::get(Call->getType())); + if (!CB.getType()->isX86_MMXTy()) + CB.replaceAllUsesWith(UndefValue::get(CB.getType())); } else { assert((RetTy->isStructTy() || RetTy->isArrayTy()) && "Return type changed, but not into a void. The old return type" " must have been a struct or an array!"); - Instruction *InsertPt = Call; - if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest()); + Instruction *InsertPt = &CB; + if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { + BasicBlock *NewEdge = + SplitEdge(NewCB->getParent(), II->getNormalDest()); InsertPt = &*NewEdge->getFirstInsertionPt(); } @@ -979,30 +966,30 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // // Start out building up our return value from undef Value *RetVal = UndefValue::get(RetTy); - for (unsigned i = 0; i != RetCount; ++i) - if (NewRetIdxs[i] != -1) { + for (unsigned Ri = 0; Ri != RetCount; ++Ri) + if (NewRetIdxs[Ri] != -1) { Value *V; + IRBuilder<NoFolder> IRB(InsertPt); if (RetTypes.size() > 1) // We are still returning a struct, so extract the value from our // return value - V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret", - InsertPt); + V = IRB.CreateExtractValue(NewCB, NewRetIdxs[Ri], "newret"); else // We are now returning a single element, so just insert that - V = New; + V = NewCB; // Insert the value at the old position - RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt); + RetVal = IRB.CreateInsertValue(RetVal, V, Ri, "oldret"); } // Now, replace all uses of the old call instruction with the return // struct we built - Call->replaceAllUsesWith(RetVal); - New->takeName(Call); + CB.replaceAllUsesWith(RetVal); + NewCB->takeName(&CB); } } // Finally, remove the old call from the program, reducing the use-count of // F. - Call->eraseFromParent(); + CB.eraseFromParent(); } // Since we have now created the new function, splice the body of the old @@ -1012,10 +999,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. - i = 0; + ArgI = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), - I2 = NF->arg_begin(); I != E; ++I, ++i) - if (ArgAlive[i]) { + I2 = NF->arg_begin(); + I != E; ++I, ++ArgI) + if (ArgAlive[ArgI]) { // If this is a live argument, move the name and users over to the new // version. I->replaceAllUsesWith(&*I2); @@ -1033,11 +1021,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { if (F->getReturnType() != NF->getReturnType()) for (BasicBlock &BB : *NF) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { - Value *RetVal; + IRBuilder<NoFolder> IRB(RI); + Value *RetVal = nullptr; - if (NFTy->getReturnType()->isVoidTy()) { - RetVal = nullptr; - } else { + if (!NFTy->getReturnType()->isVoidTy()) { assert(RetTy->isStructTy() || RetTy->isArrayTy()); // The original return value was a struct or array, insert // extractvalue/insertvalue chains to extract only the values we need @@ -1047,16 +1034,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { Value *OldRet = RI->getOperand(0); // Start out building up our return value from undef RetVal = UndefValue::get(NRetTy); - for (unsigned i = 0; i != RetCount; ++i) - if (NewRetIdxs[i] != -1) { - ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, - "oldret", RI); + for (unsigned RetI = 0; RetI != RetCount; ++RetI) + if (NewRetIdxs[RetI] != -1) { + Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret"); + if (RetTypes.size() > 1) { // We're still returning a struct, so reinsert the value into // our new return value at the new index - RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i], - "newret", RI); + RetVal = IRB.CreateInsertValue(RetVal, EV, NewRetIdxs[RetI], + "newret"); } else { // We are now only returning a simple value, so just return the // extracted value. @@ -1066,7 +1053,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { } // Replace the return instruction with one returning the new return // value (possibly 0 if we became void). - ReturnInst::Create(F->getContext(), RetVal, RI); + auto *NewRet = ReturnInst::Create(F->getContext(), RetVal, RI); + NewRet->setDebugLoc(RI->getDebugLoc()); BB.getInstList().erase(RI); } diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp index f77b528fc42da..b45766a8e7833 100644 --- a/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -54,6 +54,7 @@ namespace { class GVExtractorPass : public ModulePass { SetVector<GlobalValue *> Named; bool deleteStuff; + bool keepConstInit; public: static char ID; // Pass identification, replacement for typeid @@ -61,8 +62,9 @@ namespace { /// Otherwise, it deletes as much of the module as possible, except for the /// global values specified. explicit GVExtractorPass(std::vector<GlobalValue*> &GVs, - bool deleteS = true) - : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} + bool deleteS = true, bool keepConstInit = false) + : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS), + keepConstInit(keepConstInit) {} bool runOnModule(Module &M) override { if (skipModule(M)) @@ -83,7 +85,8 @@ namespace { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { bool Delete = - deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration() && + (!I->isConstant() || !keepConstInit); if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; @@ -156,6 +159,6 @@ namespace { } ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs, - bool deleteFn) { - return new GVExtractorPass(GVs, deleteFn); + bool deleteFn, bool keepConstInit) { + return new GVExtractorPass(GVs, deleteFn, keepConstInit); } diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index b6d0b2e35694b..4baeaa6e16304 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -33,7 +33,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -160,8 +159,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, // Check whether all pointer arguments point to local memory, and // ignore calls that only access local memory. - for (CallSite::arg_iterator CI = Call->arg_begin(), CE = Call->arg_end(); - CI != CE; ++CI) { + for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) { Value *Arg = *CI; if (!Arg->getType()->isPtrOrPtrVectorTy()) continue; @@ -362,13 +360,13 @@ struct ArgumentUsesTracker : public CaptureTracker { void tooManyUses() override { Captured = true; } bool captured(const Use *U) override { - CallSite CS(U->getUser()); - if (!CS.getInstruction()) { + CallBase *CB = dyn_cast<CallBase>(U->getUser()); + if (!CB) { Captured = true; return true; } - Function *F = CS.getCalledFunction(); + Function *F = CB->getCalledFunction(); if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { Captured = true; return true; @@ -379,14 +377,14 @@ struct ArgumentUsesTracker : public CaptureTracker { // these. unsigned UseIndex = - std::distance(const_cast<const Use *>(CS.arg_begin()), U); + std::distance(const_cast<const Use *>(CB->arg_begin()), U); - assert(UseIndex < CS.data_operands_size() && + assert(UseIndex < CB->data_operands_size() && "Indirect function calls should have been filtered above!"); - if (UseIndex >= CS.getNumArgOperands()) { + if (UseIndex >= CB->getNumArgOperands()) { // Data operand, but not a argument operand -- must be a bundle operand - assert(CS.hasOperandBundles() && "Must be!"); + assert(CB->hasOperandBundles() && "Must be!"); // CaptureTracking told us that we're being captured by an operand bundle // use. In this case it does not matter if the callee is within our SCC @@ -449,7 +447,7 @@ determinePointerReadAttrs(Argument *A, SmallPtrSet<Use *, 32> Visited; // inalloca arguments are always clobbered by the call. - if (A->hasInAllocaAttr()) + if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) return Attribute::None; bool IsRead = false; @@ -490,15 +488,15 @@ determinePointerReadAttrs(Argument *A, Worklist.push_back(&UU); }; - CallSite CS(I); - if (CS.doesNotAccessMemory()) { + CallBase &CB = cast<CallBase>(*I); + if (CB.doesNotAccessMemory()) { AddUsersToWorklistIfCapturing(); continue; } - Function *F = CS.getCalledFunction(); + Function *F = CB.getCalledFunction(); if (!F) { - if (CS.onlyReadsMemory()) { + if (CB.onlyReadsMemory()) { IsRead = true; AddUsersToWorklistIfCapturing(); continue; @@ -510,23 +508,23 @@ determinePointerReadAttrs(Argument *A, // operands. This means there is no need to adjust UseIndex to account // for these. - unsigned UseIndex = std::distance(CS.arg_begin(), U); + unsigned UseIndex = std::distance(CB.arg_begin(), U); // U cannot be the callee operand use: since we're exploring the // transitive uses of an Argument, having such a use be a callee would - // imply the CallSite is an indirect call or invoke; and we'd take the + // imply the call site is an indirect call or invoke; and we'd take the // early exit above. - assert(UseIndex < CS.data_operands_size() && + assert(UseIndex < CB.data_operands_size() && "Data operand use expected!"); - bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands(); + bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands(); if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { assert(F->isVarArg() && "More params than args in non-varargs call"); return Attribute::None; } - Captures &= !CS.doesNotCapture(UseIndex); + Captures &= !CB.doesNotCapture(UseIndex); // Since the optimizer (by design) cannot see the data flow corresponding // to a operand bundle use, these cannot participate in the optimistic SCC @@ -535,12 +533,12 @@ determinePointerReadAttrs(Argument *A, if (IsOperandBundleUse || !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) { - // The accessors used on CallSite here do the right thing for calls and + // The accessors used on call site here do the right thing for calls and // invokes with operand bundles. - if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex)) + if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) return Attribute::None; - if (!CS.doesNotAccessMemory(UseIndex)) + if (!CB.doesNotAccessMemory(UseIndex)) IsRead = true; } @@ -638,8 +636,8 @@ static bool addArgumentAttrsFromCallsites(Function &F) { // callsite. BasicBlock &Entry = F.getEntryBlock(); for (Instruction &I : Entry) { - if (auto CS = CallSite(&I)) { - if (auto *CalledFunc = CS.getCalledFunction()) { + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (auto *CalledFunc = CB->getCalledFunction()) { for (auto &CSArg : CalledFunc->args()) { if (!CSArg.hasNonNullAttr()) continue; @@ -647,7 +645,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) { // If the non-null callsite argument operand is an argument to 'F' // (the caller) and the call is guaranteed to execute, then the value // must be non-null throughout 'F'. - auto *FArg = dyn_cast<Argument>(CS.getArgOperand(CSArg.getArgNo())); + auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); if (FArg && !FArg->hasNonNullAttr()) { FArg->addAttr(Attribute::NonNull); Changed = true; @@ -904,10 +902,10 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { break; case Instruction::Call: case Instruction::Invoke: { - CallSite CS(RVI); - if (CS.hasRetAttr(Attribute::NoAlias)) + CallBase &CB = cast<CallBase>(*RVI); + if (CB.hasRetAttr(Attribute::NoAlias)) break; - if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) break; LLVM_FALLTHROUGH; } @@ -1013,8 +1011,8 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, } case Instruction::Call: case Instruction::Invoke: { - CallSite CS(RVI); - Function *Callee = CS.getCalledFunction(); + CallBase &CB = cast<CallBase>(*RVI); + Function *Callee = CB.getCalledFunction(); // A call to a node within the SCC is assumed to return null until // proven otherwise if (Callee && SCCNodes.count(Callee)) { @@ -1223,10 +1221,11 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) { /// Helper for non-Convergent inference predicate InstrBreaksAttribute. static bool InstrBreaksNonConvergent(Instruction &I, const SCCNodeSet &SCCNodes) { - const CallSite CS(&I); + const CallBase *CB = dyn_cast<CallBase>(&I); // Breaks non-convergent assumption if CS is a convergent call to a function // not in the SCC. - return CS && CS.isConvergent() && SCCNodes.count(CS.getCalledFunction()) == 0; + return CB && CB->isConvergent() && + SCCNodes.count(CB->getCalledFunction()) == 0; } /// Helper for NoUnwind inference predicate InstrBreaksAttribute. @@ -1247,11 +1246,11 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { /// Helper for NoFree inference predicate InstrBreaksAttribute. static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { - CallSite CS(&I); - if (!CS) + CallBase *CB = dyn_cast<CallBase>(&I); + if (!CB) return false; - Function *Callee = CS.getCalledFunction(); + Function *Callee = CB->getCalledFunction(); if (!Callee) return true; @@ -1306,7 +1305,7 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) { // Skip non-throwing functions. [](const Function &F) { return F.doesNotThrow(); }, // Instructions that break non-throwing assumption. - [SCCNodes](Instruction &I) { + [&SCCNodes](Instruction &I) { return InstrBreaksNonThrowing(I, SCCNodes); }, [](Function &F) { @@ -1329,7 +1328,7 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) { // Skip functions known not to free memory. [](const Function &F) { return F.doesNotFreeMemory(); }, // Instructions that break non-deallocating assumption. - [SCCNodes](Instruction &I) { + [&SCCNodes](Instruction &I) { return InstrBreaksNoFree(I, SCCNodes); }, [](Function &F) { @@ -1368,8 +1367,8 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { // marked norecurse, so any called from F to F will not be marked norecurse. for (auto &BB : *F) for (auto &I : BB.instructionsWithoutDebug()) - if (auto CS = CallSite(&I)) { - Function *Callee = CS.getCalledFunction(); + if (auto *CB = dyn_cast<CallBase>(&I)) { + Function *Callee = CB->getCalledFunction(); if (!Callee || Callee == F || !Callee->doesNotRecurse()) // Function calls a potentially recursive function. return false; @@ -1439,8 +1438,8 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, // function. if (!HasUnknownCall) for (Instruction &I : instructions(F)) - if (auto CS = CallSite(&I)) - if (!CS.getCalledFunction()) { + if (auto *CB = dyn_cast<CallBase>(&I)) + if (!CB->getCalledFunction()) { HasUnknownCall = true; break; } @@ -1575,8 +1574,8 @@ static bool addNoRecurseAttrsTopDown(Function &F) { auto *I = dyn_cast<Instruction>(U); if (!I) return false; - CallSite CS(I); - if (!CS || !CS.getParent()->getParent()->doesNotRecurse()) + CallBase *CB = dyn_cast<CallBase>(I); + if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) return false; } return setDoesNotRecurse(F); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index be0446a946ec5..468bf19f2e48a 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -306,28 +306,21 @@ static void computeImportForReferencedGlobals( RefSummary->modulePath() != Summary.modulePath(); }; - auto MarkExported = [&](const ValueInfo &VI, const GlobalValueSummary *S) { - if (ExportLists) - (*ExportLists)[S->modulePath()].insert(VI); - }; - for (auto &RefSummary : VI.getSummaryList()) if (isa<GlobalVarSummary>(RefSummary.get()) && Index.canImportGlobalVar(RefSummary.get(), /* AnalyzeRefs */ true) && !LocalNotInModule(RefSummary.get())) { auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID()); - // Only update stat if we haven't already imported this variable. - if (ILI.second) - NumImportedGlobalVarsThinLink++; - MarkExported(VI, RefSummary.get()); - // Promote referenced functions and variables. We don't promote - // objects referenced by writeonly variable initializer, because - // we convert such variables initializers to "zeroinitializer". - // See processGlobalForThinLTO. - if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get()))) - for (const auto &VI : RefSummary->refs()) - for (const auto &RefFn : VI.getSummaryList()) - MarkExported(VI, RefFn.get()); + // Only update stat and exports if we haven't already imported this + // variable. + if (!ILI.second) + break; + NumImportedGlobalVarsThinLink++; + // Any references made by this variable will be marked exported later, + // in ComputeCrossModuleImport, after import decisions are complete, + // which is more efficient than adding them here. + if (ExportLists) + (*ExportLists)[RefSummary->modulePath()].insert(VI); break; } } @@ -494,24 +487,11 @@ static void computeImportForFunction( NumImportedCriticalFunctionsThinLink++; } - // Make exports in the source module. - if (ExportLists) { - auto &ExportList = (*ExportLists)[ExportModulePath]; - ExportList.insert(VI); - if (!PreviouslyImported) { - // This is the first time this function was exported from its source - // module, so mark all functions and globals it references as exported - // to the outside if they are defined in the same source module. - // For efficiency, we unconditionally add all the referenced GUIDs - // to the ExportList for this module, and will prune out any not - // defined in the module later in a single pass. - for (auto &Edge : ResolvedCalleeSummary->calls()) - ExportList.insert(Edge.first); - - for (auto &Ref : ResolvedCalleeSummary->refs()) - ExportList.insert(Ref); - } - } + // Any calls/references made by this function will be marked exported + // later, in ComputeCrossModuleImport, after import decisions are + // complete, which is more efficient than adding them here. + if (ExportLists) + (*ExportLists)[ExportModulePath].insert(VI); } auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) { @@ -678,20 +658,55 @@ void llvm::ComputeCrossModuleImport( &ExportLists); } - // When computing imports we added all GUIDs referenced by anything - // imported from the module to its ExportList. Now we prune each ExportList - // of any not defined in that module. This is more efficient than checking - // while computing imports because some of the summary lists may be long - // due to linkonce (comdat) copies. + // When computing imports we only added the variables and functions being + // imported to the export list. We also need to mark any references and calls + // they make as exported as well. We do this here, as it is more efficient + // since we may import the same values multiple times into different modules + // during the import computation. for (auto &ELI : ExportLists) { + FunctionImporter::ExportSetTy NewExports; const auto &DefinedGVSummaries = ModuleToDefinedGVSummaries.lookup(ELI.first()); - for (auto EI = ELI.second.begin(); EI != ELI.second.end();) { + for (auto &EI : ELI.second) { + // Find the copy defined in the exporting module so that we can mark the + // values it references in that specific definition as exported. + // Below we will add all references and called values, without regard to + // whether they are also defined in this module. We subsequently prune the + // list to only include those defined in the exporting module, see comment + // there as to why. + auto DS = DefinedGVSummaries.find(EI.getGUID()); + // Anything marked exported during the import computation must have been + // defined in the exporting module. + assert(DS != DefinedGVSummaries.end()); + auto *S = DS->getSecond(); + S = S->getBaseObject(); + if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) { + // Export referenced functions and variables. We don't export/promote + // objects referenced by writeonly variable initializer, because + // we convert such variables initializers to "zeroinitializer". + // See processGlobalForThinLTO. + if (!Index.isWriteOnly(GVS)) + for (const auto &VI : GVS->refs()) + NewExports.insert(VI); + } else { + auto *FS = cast<FunctionSummary>(S); + for (auto &Edge : FS->calls()) + NewExports.insert(Edge.first); + for (auto &Ref : FS->refs()) + NewExports.insert(Ref); + } + } + // Prune list computed above to only include values defined in the exporting + // module. We do this after the above insertion since we may hit the same + // ref/call target multiple times in above loop, and it is more efficient to + // avoid a set lookup each time. + for (auto EI = NewExports.begin(); EI != NewExports.end();) { if (!DefinedGVSummaries.count(EI->getGUID())) - ELI.second.erase(EI++); + NewExports.erase(EI++); else ++EI; } + ELI.second.insert(NewExports.begin(), NewExports.end()); } assert(checkVariableImport(Index, ImportLists, ExportLists)); @@ -913,11 +928,12 @@ void llvm::gatherImportedSummariesForModule( const FunctionImporter::ImportMapTy &ImportList, std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) { // Include all summaries from the importing module. - ModuleToSummariesForIndex[ModulePath] = + ModuleToSummariesForIndex[std::string(ModulePath)] = ModuleToDefinedGVSummaries.lookup(ModulePath); // Include summaries for imports. for (auto &ILI : ImportList) { - auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()]; + auto &SummariesForIndex = + ModuleToSummariesForIndex[std::string(ILI.first())]; const auto &DefinedGVSummaries = ModuleToDefinedGVSummaries.lookup(ILI.first()); for (auto &GI : ILI.second) { @@ -976,6 +992,8 @@ bool llvm::convertToDeclaration(GlobalValue &GV) { GV.replaceAllUsesWith(NewGV); return false; } + if (!GV.isImplicitDSOLocal()) + GV.setDSOLocal(false); return true; } @@ -1214,8 +1232,15 @@ Expected<bool> FunctionImporter::importFunctions( // have loaded all the required metadata! UpgradeDebugInfo(*SrcModule); + // Set the partial sample profile ratio in the profile summary module flag + // of the imported source module, if applicable, so that the profile summary + // module flag will match with that of the destination module when it's + // imported. + SrcModule->setPartialSampleProfileRatio(Index); + // Link in the specified functions. - if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport)) + if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations, + &GlobalsToImport)) return true; if (PrintImports) { @@ -1224,10 +1249,12 @@ Expected<bool> FunctionImporter::importFunctions( << " from " << SrcModule->getSourceFileName() << "\n"; } - if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(), - [](GlobalValue &, IRMover::ValueAdder) {}, - /*IsPerformingImport=*/true)) - report_fatal_error("Function Import: link error"); + if (Error Err = Mover.move( + std::move(SrcModule), GlobalsToImport.getArrayRef(), + [](GlobalValue &, IRMover::ValueAdder) {}, + /*IsPerformingImport=*/true)) + report_fatal_error("Function Import: link error: " + + toString(std::move(Err))); ImportedCount += GlobalsToImport.size(); NumImportedModules++; @@ -1284,16 +1311,18 @@ static bool doImportingForModule(Module &M) { // Next we need to promote to global scope and rename any local values that // are potentially exported to other modules. - if (renameModuleForThinLTO(M, *Index, nullptr)) { + if (renameModuleForThinLTO(M, *Index, /*clearDSOOnDeclarations=*/false, + /*GlobalsToImport=*/nullptr)) { errs() << "Error renaming module\n"; return false; } // Perform the import now. auto ModuleLoader = [&M](StringRef Identifier) { - return loadFile(Identifier, M.getContext()); + return loadFile(std::string(Identifier), M.getContext()); }; - FunctionImporter Importer(*Index, ModuleLoader); + FunctionImporter Importer(*Index, ModuleLoader, + /*ClearDSOLocalOnDeclarations=*/false); Expected<bool> Result = Importer.importFunctions(M, ImportList); // FIXME: Probably need to propagate Errors through the pass manager. diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 72b8d7522f04f..fb4cb23b837e0 100644 --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -263,6 +263,15 @@ void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { if (!ClEnableVFE) return; + // If the Virtual Function Elim module flag is present and set to zero, then + // the vcall_visibility metadata was inserted for another optimization (WPD) + // and we may not have type checked loads on all accesses to the vtable. + // Don't attempt VFE in that case. + auto *Val = mdconst::dyn_extract_or_null<ConstantInt>( + M.getModuleFlag("Virtual Function Elim")); + if (!Val || Val->getZExtValue() == 0) + return; + ScanVTables(M); if (VFESafeVTables.empty()) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 0fd966457ece4..d9fb820f7cb53 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -28,7 +28,6 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -41,6 +40,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -128,13 +128,16 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { Type *Ty = Types.pop_back_val(); switch (Ty->getTypeID()) { default: break; - case Type::PointerTyID: return true; + case Type::PointerTyID: + return true; + case Type::FixedVectorTyID: + case Type::ScalableVectorTyID: + if (cast<VectorType>(Ty)->getElementType()->isPointerTy()) + return true; + break; case Type::ArrayTyID: - case Type::VectorTyID: { - SequentialType *STy = cast<SequentialType>(Ty); - Types.push_back(STy->getElementType()); + Types.push_back(cast<ArrayType>(Ty)->getElementType()); break; - } case Type::StructTyID: { StructType *STy = cast<StructType>(Ty); if (STy->isOpaque()) return true; @@ -142,7 +145,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { E = STy->element_end(); I != E; ++I) { Type *InnerTy = *I; if (isa<PointerType>(InnerTy)) return true; - if (isa<CompositeType>(InnerTy)) + if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) || + isa<VectorType>(InnerTy)) Types.push_back(InnerTy); } break; @@ -191,10 +195,10 @@ CleanupPointerRootUsers(GlobalVariable *GV, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory - // usage over time. The common strategy for leak checkers is to whitelist the - // memory pointed to by globals at exit. This is popular because it also - // solves another problem where the main thread of a C++ program may shut down - // before other threads that are still expecting to use those globals. To + // usage over time. The common strategy for leak checkers is to explicitly + // allow the memory pointed to by globals at exit. This is popular because it + // also solves another problem where the main thread of a C++ program may shut + // down before other threads that are still expecting to use those globals. To // handle that case, we expect the program may create a singleton and never // destroy it. @@ -433,13 +437,27 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { return true; } +static bool IsSRASequential(Type *T) { + return isa<ArrayType>(T) || isa<VectorType>(T); +} +static uint64_t GetSRASequentialNumElements(Type *T) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return AT->getNumElements(); + return cast<FixedVectorType>(T)->getNumElements(); +} +static Type *GetSRASequentialElementType(Type *T) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return AT->getElementType(); + return cast<VectorType>(T)->getElementType(); +} static bool CanDoGlobalSRA(GlobalVariable *GV) { Constant *Init = GV->getInitializer(); if (isa<StructType>(Init->getType())) { // nothing to check - } else if (SequentialType *STy = dyn_cast<SequentialType>(Init->getType())) { - if (STy->getNumElements() > 16 && GV->hasNUsesOrMore(16)) + } else if (IsSRASequential(Init->getType())) { + if (GetSRASequentialNumElements(Init->getType()) > 16 && + GV->hasNUsesOrMore(16)) return false; // It's not worth it. } else return false; @@ -450,14 +468,19 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) { /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, - uint64_t FragmentSizeInBits, - unsigned NumElements) { + uint64_t FragmentSizeInBits) { SmallVector<DIGlobalVariableExpression *, 1> GVs; GV->getDebugInfo(GVs); for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); + Optional<uint64_t> VarSize = Var->getSizeInBits(); + DIExpression *Expr = GVE->getExpression(); - if (NumElements > 1) { + // If the FragmentSize is smaller than the variable, + // emit a fragment expression. + // If the variable size is unknown a fragment must be + // emitted to be safe. + if (!VarSize || FragmentSizeInBits < *VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -486,9 +509,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { std::map<unsigned, GlobalVariable *> NewGlobals; // Get the alignment of the global, either explicit or target-specific. - unsigned StartAlignment = GV->getAlignment(); - if (StartAlignment == 0) - StartAlignment = DL.getABITypeAlignment(GV->getType()); + Align StartAlignment = + DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType()); // Loop over all users and create replacement variables for used aggregate // elements. @@ -509,8 +531,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Type *ElTy = nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) ElTy = STy->getElementType(ElementIdx); - else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) - ElTy = STy->getElementType(); + else + ElTy = GetSRASequentialElementType(Ty); assert(ElTy); Constant *In = Init->getAggregateElement(ElementIdx); @@ -531,29 +553,27 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // had 256 byte alignment for example, something might depend on that: // propagate info to each field. uint64_t FieldOffset = Layout.getElementOffset(ElementIdx); - Align NewAlign(MinAlign(StartAlignment, FieldOffset)); - if (NewAlign > - Align(DL.getABITypeAlignment(STy->getElementType(ElementIdx)))) + Align NewAlign = commonAlignment(StartAlignment, FieldOffset); + if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx))) NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, - STy->getNumElements()); - } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size); + } else { uint64_t EltSize = DL.getTypeAllocSize(ElTy); - Align EltAlign(DL.getABITypeAlignment(ElTy)); + Align EltAlign = DL.getABITypeAlign(ElTy); uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - Align NewAlign(MinAlign(StartAlignment, EltSize * ElementIdx)); + Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx); if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, - FragmentSizeInBits, STy->getNumElements()); + FragmentSizeInBits); } } @@ -641,12 +661,12 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, return false; // Storing the value. } } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { - if (CI->getCalledValue() != V) { + if (CI->getCalledOperand() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { - if (II->getCalledValue() != V) { + if (II->getCalledOperand() != V) { //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } @@ -659,9 +679,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, // checked. if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa<ICmpInst>(U) && - isa<ConstantPointerNull>(U->getOperand(1))) { - // Ignore icmp X, null } else { //cerr << "NONTRAPPING USE: " << *U; return false; @@ -706,17 +723,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { Changed = true; } } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - CallSite CS(I); - if (CS.getCalledValue() == V) { + CallBase *CB = cast<CallBase>(I); + if (CB->getCalledOperand() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - CS.setCalledFunction(NewV); + CB->setCalledOperand(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) - if (CS.getArgument(i) == V) { + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) + if (CB->getArgOperand(i) == V) { PassedAsArg = true; - CS.setArgument(i, NewV); + CB->setArgOperand(i, NewV); } if (PassedAsArg) { @@ -905,7 +922,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, - None, SI->getOrdering(), SI->getSyncScopeID(), SI); + Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -922,7 +939,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool->getValueType(), InitBool, - InitBool->getName() + ".val", false, None, + InitBool->getName() + ".val", false, Align(1), LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction *)ICI : LI); InitBoolUsed = true; @@ -1729,7 +1746,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, None, + LI->getName() + ".b", false, Align(1), LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && @@ -1739,14 +1756,14 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } StoreInst *NSI = - new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(), + new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI); NSI->setDebugLoc(SI->getDebugLoc()); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast<LoadInst>(UI); LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, None, + LI->getName() + ".b", false, Align(1), LI->getOrdering(), LI->getSyncScopeID(), LI); Instruction *NSI; if (IsOneZero) @@ -2117,8 +2134,7 @@ static void ChangeCalleesToFastCall(Function *F) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - CS.setCallingConv(CallingConv::Fast); + cast<CallBase>(U)->setCallingConv(CallingConv::Fast); } } @@ -2135,8 +2151,8 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A)); + CallBase *CB = cast<CallBase>(U); + CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A)); } } @@ -2175,12 +2191,12 @@ static bool hasChangeableCC(Function *F) { /// Return true if the block containing the call site has a BlockFrequency of /// less than ColdCCRelFreq% of the entry block. -static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) { +static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) { const BranchProbability ColdProb(ColdCCRelFreq, 100); - auto CallSiteBB = CS.getInstruction()->getParent(); + auto *CallSiteBB = CB.getParent(); auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); auto CallerEntryFreq = - CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock())); + CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock())); return CallSiteFreq < CallerEntryFreq * ColdProb; } @@ -2200,10 +2216,10 @@ isValidCandidateForColdCC(Function &F, if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - Function *CallerFunc = CS.getInstruction()->getParent()->getParent(); + CallBase &CB = cast<CallBase>(*U); + Function *CallerFunc = CB.getParent()->getParent(); BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); - if (!isColdCallSite(CS, CallerBFI)) + if (!isColdCallSite(CB, CallerBFI)) return false; auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc); if (It == AllCallsCold.end()) @@ -2216,8 +2232,7 @@ static void changeCallSitesToColdCC(Function *F) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) continue; - CallSite CS(cast<Instruction>(U)); - CS.setCallingConv(CallingConv::Cold); + cast<CallBase>(U)->setCallingConv(CallingConv::Cold); } } @@ -2230,7 +2245,6 @@ hasOnlyColdCalls(Function &F, for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (CallInst *CI = dyn_cast<CallInst>(&I)) { - CallSite CS(cast<Instruction>(CI)); // Skip over isline asm instructions since they aren't function calls. if (CI->isInlineAsm()) continue; @@ -2247,7 +2261,7 @@ hasOnlyColdCalls(Function &F, CalledFn->hasAddressTaken()) return false; BlockFrequencyInfo &CallerBFI = GetBFI(F); - if (!isColdCallSite(CS, CallerBFI)) + if (!isColdCallSite(*CI, CallerBFI)) return false; } } @@ -2255,6 +2269,115 @@ hasOnlyColdCalls(Function &F, return true; } +static bool hasMustTailCallers(Function *F) { + for (User *U : F->users()) { + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB) { + assert(isa<BlockAddress>(U) && + "Expected either CallBase or BlockAddress"); + continue; + } + if (CB->isMustTailCall()) + return true; + } + return false; +} + +static bool hasInvokeCallers(Function *F) { + for (User *U : F->users()) + if (isa<InvokeInst>(U)) + return true; + return false; +} + +static void RemovePreallocated(Function *F) { + RemoveAttribute(F, Attribute::Preallocated); + + auto *M = F->getParent(); + + IRBuilder<> Builder(M->getContext()); + + // Cannot modify users() while iterating over it, so make a copy. + SmallVector<User *, 4> PreallocatedCalls(F->users()); + for (User *U : PreallocatedCalls) { + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB) + continue; + + assert( + !CB->isMustTailCall() && + "Shouldn't call RemotePreallocated() on a musttail preallocated call"); + // Create copy of call without "preallocated" operand bundle. + SmallVector<OperandBundleDef, 1> OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + CallBase *PreallocatedSetup = nullptr; + for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { + if (It->getTag() == "preallocated") { + PreallocatedSetup = cast<CallBase>(*It->input_begin()); + OpBundles.erase(It); + break; + } + } + assert(PreallocatedSetup && "Did not find preallocated bundle"); + uint64_t ArgCount = + cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue(); + + assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) && + "Unknown indirect call type"); + CallBase *NewCB = CallBase::Create(CB, OpBundles, CB); + CB->replaceAllUsesWith(NewCB); + NewCB->takeName(CB); + CB->eraseFromParent(); + + Builder.SetInsertPoint(PreallocatedSetup); + auto *StackSave = + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction()); + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + + // Replace @llvm.call.preallocated.arg() with alloca. + // Cannot modify users() while iterating over it, so make a copy. + // @llvm.call.preallocated.arg() can be called with the same index multiple + // times. So for each @llvm.call.preallocated.arg(), we see if we have + // already created a Value* for the index, and if not, create an alloca and + // bitcast right after the @llvm.call.preallocated.setup() so that it + // dominates all uses. + SmallVector<Value *, 2> ArgAllocas(ArgCount); + SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users()); + for (auto *User : PreallocatedArgs) { + auto *UseCall = cast<CallBase>(User); + assert(UseCall->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_arg && + "preallocated token use was not a llvm.call.preallocated.arg"); + uint64_t AllocArgIndex = + cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue(); + Value *AllocaReplacement = ArgAllocas[AllocArgIndex]; + if (!AllocaReplacement) { + auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); + auto *ArgType = UseCall + ->getAttribute(AttributeList::FunctionIndex, + Attribute::Preallocated) + .getValueAsType(); + auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction(); + Builder.SetInsertPoint(InsertBefore); + auto *Alloca = + Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg"); + auto *BitCast = Builder.CreateBitCast( + Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName()); + ArgAllocas[AllocArgIndex] = BitCast; + AllocaReplacement = BitCast; + } + + UseCall->replaceAllUsesWith(AllocaReplacement); + UseCall->eraseFromParent(); + } + // Remove @llvm.call.preallocated.setup(). + cast<Instruction>(PreallocatedSetup)->eraseFromParent(); + } +} + static bool OptimizeFunctions(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI, @@ -2319,11 +2442,22 @@ OptimizeFunctions(Module &M, // FIXME: We should also hoist alloca affected by this to the entry // block if possible. if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && - !F->hasAddressTaken()) { + !F->hasAddressTaken() && !hasMustTailCallers(F)) { RemoveAttribute(F, Attribute::InAlloca); Changed = true; } + // FIXME: handle invokes + // FIXME: handle musttail + if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { + if (!F->hasAddressTaken() && !hasMustTailCallers(F) && + !hasInvokeCallers(F)) { + RemovePreallocated(F); + Changed = true; + } + continue; + } + if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { NumInternalFunc++; TargetTransformInfo &TTI = GetTTI(*F); @@ -2385,7 +2519,7 @@ OptimizeGlobalVars(Module &M, // for that optional parameter, since we don't have a Function to // provide GetTLI anyway. Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); - if (New && New != C) + if (New != C) GV->setInitializer(New); } @@ -2427,8 +2561,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, } ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - SequentialType *InitTy = cast<SequentialType>(Init->getType()); - uint64_t NumElts = InitTy->getNumElements(); + uint64_t NumElts; + if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) + NumElts = ATy->getNumElements(); + else + NumElts = cast<FixedVectorType>(Init->getType())->getNumElements(); // Break up the array into elements. for (uint64_t i = 0, e = NumElts; i != e; ++i) @@ -2439,7 +2576,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts); return ConstantVector::get(Elts); } @@ -2561,8 +2698,10 @@ static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) { unsigned NumElts; if (auto *STy = dyn_cast<StructType>(Ty)) NumElts = STy->getNumElements(); + else if (auto *ATy = dyn_cast<ArrayType>(Ty)) + NumElts = ATy->getNumElements(); else - NumElts = cast<SequentialType>(Ty)->getNumElements(); + NumElts = cast<FixedVectorType>(Ty)->getNumElements(); for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(Init->getAggregateElement(i)); } diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp index 4a319ead23c0e..365b269dc3bf6 100644 --- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp +++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp @@ -111,6 +111,9 @@ static bool splitGlobal(GlobalVariable &GV) { ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)), Type->getOperand(1)})); } + + if (GV.hasMetadata(LLVMContext::MD_vcall_visibility)) + SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility()); } for (User *U : GV.users()) { diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 5e690714bfdfb..d0bd0166534a7 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -39,7 +39,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -110,8 +109,8 @@ bool unlikelyExecuted(BasicBlock &BB) { // The block is cold if it calls/invokes a cold function. However, do not // mark sanitizer traps as cold. for (Instruction &I : BB) - if (auto CS = CallSite(&I)) - if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize")) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize")) return true; // The block is cold if it has an unreachable terminator, unless it's @@ -325,11 +324,10 @@ Function *HotColdSplitting::extractColdRegion( if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); CallInst *CI = cast<CallInst>(U); - CallSite CS(CI); NumColdRegionsOutlined++; if (TTI.useColdCCForColdCall(*OutF)) { OutF->setCallingConv(CallingConv::Cold); - CS.setCallingConv(CallingConv::Cold); + CI->setCallingConv(CallingConv::Cold); } CI->setIsNoInline(); @@ -458,6 +456,10 @@ public: // first have predecessors within the extraction region. if (mayExtractBlock(SinkBB)) { addBlockToRegion(&SinkBB, SinkScore); + if (pred_empty(&SinkBB)) { + ColdRegion->EntireFunctionCold = true; + return Regions; + } } else { Regions.emplace_back(); ColdRegion = &Regions.back(); diff --git a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index 1bda13a9bdd80..8d05a72d68dac 100644 --- a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -17,7 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CallSite.h" +#include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -53,7 +53,7 @@ static bool PropagateConstantsIntoArguments(Function &F) { // For each argument, keep track of its constant value and whether it is a // constant or not. The bool is driven to true when found to be non-constant. - SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants; + SmallVector<PointerIntPair<Constant *, 1, bool>, 16> ArgumentConstants; ArgumentConstants.resize(F.arg_size()); unsigned NumNonconstant = 0; @@ -80,7 +80,7 @@ static bool PropagateConstantsIntoArguments(Function &F) { for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) { // If this argument is known non-constant, ignore it. - if (ArgumentConstants[i].second) + if (ArgumentConstants[i].getInt()) continue; Value *V = ACS.getCallArgOperand(i); @@ -102,13 +102,13 @@ static bool PropagateConstantsIntoArguments(Function &F) { if (++NumNonconstant == ArgumentConstants.size()) return false; - ArgumentConstants[i].second = true; + ArgumentConstants[i].setInt(true); continue; } - if (C && ArgumentConstants[i].first == nullptr) { - ArgumentConstants[i].first = C; // First constant seen. - } else if (C && ArgumentConstants[i].first == C) { + if (C && ArgumentConstants[i].getPointer() == nullptr) { + ArgumentConstants[i].setPointer(C); // First constant seen. + } else if (C && ArgumentConstants[i].getPointer() == C) { // Still the constant value we think it is. } else if (V == &*Arg) { // Ignore recursive calls passing argument down. @@ -117,7 +117,7 @@ static bool PropagateConstantsIntoArguments(Function &F) { // give up on this function. if (++NumNonconstant == ArgumentConstants.size()) return false; - ArgumentConstants[i].second = true; + ArgumentConstants[i].setInt(true); } } } @@ -128,11 +128,11 @@ static bool PropagateConstantsIntoArguments(Function &F) { Function::arg_iterator AI = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? - if (ArgumentConstants[i].second || AI->use_empty() || - AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) + if (ArgumentConstants[i].getInt() || AI->use_empty() || + (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; - Value *V = ArgumentConstants[i].first; + Value *V = ArgumentConstants[i].getPointer(); if (!V) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); ++NumArgumentsProped; @@ -222,16 +222,15 @@ static bool PropagateConstantReturn(Function &F) { // constant. bool MadeChange = false; for (Use &U : F.uses()) { - CallSite CS(U.getUser()); - Instruction* Call = CS.getInstruction(); + CallBase *CB = dyn_cast<CallBase>(U.getUser()); // Not a call instruction or a call instruction that's not calling F // directly? - if (!Call || !CS.isCallee(&U)) + if (!CB || !CB->isCallee(&U)) continue; // Call result not used? - if (Call->use_empty()) + if (CB->use_empty()) continue; MadeChange = true; @@ -241,12 +240,12 @@ static bool PropagateConstantReturn(Function &F) { if (Argument *A = dyn_cast<Argument>(New)) // Was an argument returned? Then find the corresponding argument in // the call instruction and use that. - New = CS.getArgument(A->getArgNo()); - Call->replaceAllUsesWith(New); + New = CB->getArgOperand(A->getArgNo()); + CB->replaceAllUsesWith(New); continue; } - for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) { + for (auto I = CB->user_begin(), E = CB->user_end(); I != E;) { Instruction *Ins = cast<Instruction>(*I); // Increment now, so we can remove the use @@ -266,7 +265,7 @@ static bool PropagateConstantReturn(Function &F) { if (Argument *A = dyn_cast<Argument>(New)) // Was an argument returned? Then find the corresponding argument in // the call instruction and use that. - New = CS.getArgument(A->getArgNo()); + New = CB->getArgOperand(A->getArgNo()); Ins->replaceAllUsesWith(New); Ins->eraseFromParent(); } diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index 8a15800cbdb5b..d37b9236380d4 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -23,6 +23,7 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { + initializeOpenMPOptLegacyPassPass(Registry); initializeArgPromotionPass(Registry); initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); @@ -46,6 +47,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeMergeFunctionsLegacyPassPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); + initializeAttributorCGSCCLegacyPassPass(Registry); initializePostOrderFunctionAttrsLegacyPassPass(Registry); initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry); initializePruneEHPass(Registry); diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp index e818743544e68..76f1d0c54d081 100644 --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" @@ -52,26 +51,26 @@ public: static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) override { - Function *Callee = CS.getCalledFunction(); + InlineCost getInlineCost(CallBase &CB) override { + Function *Callee = CB.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); bool RemarksEnabled = false; - const auto &BBs = CS.getCaller()->getBasicBlockList(); + const auto &BBs = CB.getCaller()->getBasicBlockList(); if (!BBs.empty()) { auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front()); if (DI.isEnabled()) RemarksEnabled = true; } - OptimizationRemarkEmitter ORE(CS.getCaller()); + OptimizationRemarkEmitter ORE(CB.getCaller()); std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - return llvm::getInlineCost( - cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr); + return llvm::getInlineCost(CB, Params, TTI, GetAssumptionCache, GetTLI, + /*GetBFI=*/nullptr, PSI, + RemarksEnabled ? &ORE : nullptr); } bool runOnSCC(CallGraphSCC &SCC) override; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 4b72261131c16..7d2260f4c169d 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -28,16 +29,16 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -57,8 +58,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #include <cassert> @@ -77,11 +80,6 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); -// This weirdly named statistic tracks the number of times that, when attempting -// to inline a function A into B, we analyze the callers of B in order to see -// if those would be more profitable and blocked inline steps. -STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); - /// Flag to disable manual alloca merging. /// /// Merging of allocas was originally done as a stack-size saving technique @@ -112,14 +110,6 @@ static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions")); -/// Flag to add inline messages as callsite attributes 'inline-remark'. -static cl::opt<bool> - InlineRemarkAttribute("inline-remark-attribute", cl::init(false), - cl::Hidden, - cl::desc("Enable adding inline-remark attribute to" - " callsites processed by inliner but decided" - " to be not inlined")); - LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) @@ -158,13 +148,13 @@ using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>; /// *actually make it to the backend*, which is really what we want. /// /// Because we don't have this information, we do this simple and useful hack. -static void mergeInlinedArrayAllocas( - Function *Caller, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { +static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, + int InlineHistory) { SmallPtrSet<AllocaInst *, 16> UsedAllocas; - // When processing our SCC, check to see if CS was inlined from some other - // call site. For example, if we're processing "A" in this code: + // When processing our SCC, check to see if the call site was inlined from + // some other call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } @@ -180,7 +170,7 @@ static void mergeInlinedArrayAllocas( // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. - for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; + for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; @@ -201,8 +191,8 @@ static void mergeInlinedArrayAllocas( // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (AllocaInst *AvailableAlloca : AllocasForType) { - unsigned Align1 = AI->getAlignment(), - Align2 = AvailableAlloca->getAlignment(); + Align Align1 = AI->getAlign(); + Align Align2 = AvailableAlloca->getAlign(); // The available alloca has to be in the right function, not in some other // function in this SCC. @@ -229,18 +219,8 @@ static void mergeInlinedArrayAllocas( AI->replaceAllUsesWith(AvailableAlloca); - if (Align1 != Align2) { - if (!Align1 || !Align2) { - const DataLayout &DL = Caller->getParent()->getDataLayout(); - unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); - - Align1 = Align1 ? Align1 : TypeAlign; - Align2 = Align2 ? Align2 : TypeAlign; - } - - if (Align1 > Align2) - AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment())); - } + if (Align1 > Align2) + AvailableAlloca->setAlignment(AI->getAlign()); AI->eraseFromParent(); MergedAwayAlloca = true; @@ -271,20 +251,20 @@ static void mergeInlinedArrayAllocas( /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static InlineResult InlineCallIfPossible( - CallSite CS, InlineFunctionInfo &IFI, +static InlineResult inlineCallIfPossible( + CallBase &CB, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { - Function *Callee = CS.getCalledFunction(); - Function *Caller = CS.getCaller(); + Function *Callee = CB.getCalledFunction(); + Function *Caller = CB.getCaller(); AAResults &AAR = AARGetter(*Callee); // Try to inline the function. Get the list of static allocas that were // inlined. - InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime); - if (!IR) + InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime); + if (!IR.isSuccess()) return IR; if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) @@ -298,188 +278,9 @@ static InlineResult InlineCallIfPossible( return IR; // success } -/// Return true if inlining of CS can block the caller from being -/// inlined which is proved to be more beneficial. \p IC is the -/// estimated inline cost associated with callsite \p CS. -/// \p TotalSecondaryCost will be set to the estimated cost of inlining the -/// caller if \p CS is suppressed for inlining. -static bool -shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, - int &TotalSecondaryCost, - function_ref<InlineCost(CallSite CS)> GetInlineCost) { - // For now we only handle local or inline functions. - if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) - return false; - // If the cost of inlining CS is non-positive, it is not going to prevent the - // caller from being inlined into its callers and hence we don't need to - // defer. - if (IC.getCost() <= 0) - return false; - // Try to detect the case where the current inlining candidate caller (call - // it B) is a static or linkonce-ODR function and is an inlining candidate - // elsewhere, and the current candidate callee (call it C) is large enough - // that inlining it into B would make B too big to inline later. In these - // circumstances it may be best not to inline C into B, but to inline B into - // its callers. - // - // This only applies to static and linkonce-ODR functions because those are - // expected to be available for inlining in the translation units where they - // are used. Thus we will always have the opportunity to make local inlining - // decisions. Importantly the linkonce-ODR linkage covers inline functions - // and templates in C++. - // - // FIXME: All of this logic should be sunk into getInlineCost. It relies on - // the internal implementation of the inline cost metrics rather than - // treating them as truly abstract units etc. - TotalSecondaryCost = 0; - // The candidate cost to be imposed upon the current function. - int CandidateCost = IC.getCost() - 1; - // If the caller has local linkage and can be inlined to all its callers, we - // can apply a huge negative bonus to TotalSecondaryCost. - bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse(); - // This bool tracks what happens if we DO inline C into B. - bool inliningPreventsSomeOuterInline = false; - for (User *U : Caller->users()) { - // If the caller will not be removed (either because it does not have a - // local linkage or because the LastCallToStaticBonus has been already - // applied), then we can exit the loop early. - if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost()) - return false; - CallSite CS2(U); - - // If this isn't a call to Caller (it could be some other sort - // of reference) skip it. Such references will prevent the caller - // from being removed. - if (!CS2 || CS2.getCalledFunction() != Caller) { - ApplyLastCallBonus = false; - continue; - } - - InlineCost IC2 = GetInlineCost(CS2); - ++NumCallerCallersAnalyzed; - if (!IC2) { - ApplyLastCallBonus = false; - continue; - } - if (IC2.isAlways()) - continue; - - // See if inlining of the original callsite would erase the cost delta of - // this callsite. We subtract off the penalty for the call instruction, - // which we would be deleting. - if (IC2.getCostDelta() <= CandidateCost) { - inliningPreventsSomeOuterInline = true; - TotalSecondaryCost += IC2.getCost(); - } - } - // If all outer calls to Caller would get inlined, the cost for the last - // one is set very low by getInlineCost, in anticipation that Caller will - // be removed entirely. We did not account for this above unless there - // is only one caller of Caller. - if (ApplyLastCallBonus) - TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; - - if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) - return true; - - return false; -} - -static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R, - const ore::NV &Arg) { - return R << Arg.Val; -} - -template <class RemarkT> -RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { - using namespace ore; - if (IC.isAlways()) { - R << "(cost=always)"; - } else if (IC.isNever()) { - R << "(cost=never)"; - } else { - R << "(cost=" << ore::NV("Cost", IC.getCost()) - << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; - } - if (const char *Reason = IC.getReason()) - R << ": " << ore::NV("Reason", Reason); - return R; -} - -static std::string inlineCostStr(const InlineCost &IC) { - std::stringstream Remark; - Remark << IC; - return Remark.str(); -} - -/// Return the cost only if the inliner should attempt to inline at the given -/// CallSite. If we return the cost, we will emit an optimisation remark later -/// using that cost, so we won't do so from this function. -static Optional<InlineCost> -shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - - InlineCost IC = GetInlineCost(CS); - Instruction *Call = CS.getInstruction(); - Function *Callee = CS.getCalledFunction(); - Function *Caller = CS.getCaller(); - - if (IC.isAlways()) { - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) - << ", Call: " << *CS.getInstruction() << "\n"); - return IC; - } - - if (IC.isNever()) { - LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) - << ", Call: " << *CS.getInstruction() << "\n"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) - << NV("Callee", Callee) << " not inlined into " - << NV("Caller", Caller) << " because it should never be inlined " - << IC; - }); - return IC; - } - - if (!IC) { - LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) - << ", Call: " << *CS.getInstruction() << "\n"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) - << NV("Callee", Callee) << " not inlined into " - << NV("Caller", Caller) << " because too costly to inline " << IC; - }); - return IC; - } - - int TotalSecondaryCost = 0; - if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) { - LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() - << " Cost = " << IC.getCost() - << ", outer Cost = " << TotalSecondaryCost << '\n'); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", - Call) - << "Not inlining. Cost of inlining " << NV("Callee", Callee) - << " increases the cost of inlining " << NV("Caller", Caller) - << " in other contexts"; - }); - - // IC does not bool() to false, so get an InlineCost that will. - // This will not be inspected to make an error message. - return None; - } - - LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) - << ", Call: " << *CS.getInstruction() << '\n'); - return IC; -} - /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. -static bool InlineHistoryIncludes( +static bool inlineHistoryIncludes( Function *F, int InlineHistoryID, const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) { while (InlineHistoryID != -1) { @@ -504,33 +305,13 @@ bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { return inlineCalls(SCC); } -static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, - const BasicBlock *Block, const Function &Callee, - const Function &Caller, const InlineCost &IC) { - ORE.emit([&]() { - bool AlwaysInline = IC.isAlways(); - StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; - return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block) - << ore::NV("Callee", &Callee) << " inlined into " - << ore::NV("Caller", &Caller) << " with " << IC; - }); -} - -static void setInlineRemark(CallSite &CS, StringRef message) { - if (!InlineRemarkAttribute) - return; - - Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message); - CS.addAttribute(AttributeList::FunctionIndex, attr); -} - static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function<AssumptionCache &(Function &)> GetAssumptionCache, ProfileSummaryInfo *PSI, - std::function<TargetLibraryInfo &(Function &)> GetTLI, + std::function<const TargetLibraryInfo &(Function &)> GetTLI, bool InsertLifetime, - function_ref<InlineCost(CallSite CS)> GetInlineCost, + function_ref<InlineCost(CallBase &CB)> GetInlineCost, function_ref<AAResults &(Function &)> AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { SmallPtrSet<Function *, 8> SCCFunctions; @@ -545,7 +326,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. - SmallVector<std::pair<CallSite, int>, 16> CallSites; + SmallVector<std::pair<CallBase *, int>, 16> CallSites; // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid @@ -561,31 +342,31 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, OptimizationRemarkEmitter ORE(F); for (BasicBlock &BB : *F) for (Instruction &I : BB) { - CallSite CS(cast<Value>(&I)); + auto *CB = dyn_cast<CallBase>(&I); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. - if (!CS || isa<IntrinsicInst>(I)) + if (!CB || isa<IntrinsicInst>(I)) continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. - if (Function *Callee = CS.getCalledFunction()) + if (Function *Callee = CB->getCalledFunction()) if (Callee->isDeclaration()) { using namespace ore; - setInlineRemark(CS, "unavailable definition"); + setInlineRemark(*CB, "unavailable definition"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", CS.getCaller()) + << NV("Caller", CB->getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); continue; } - CallSites.push_back(std::make_pair(CS, -1)); + CallSites.push_back(std::make_pair(CB, -1)); } } @@ -598,13 +379,13 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); - for (unsigned i = 0; i < FirstCallInSCC; ++i) - if (Function *F = CallSites[i].first.getCalledFunction()) + for (unsigned I = 0; I < FirstCallInSCC; ++I) + if (Function *F = CallSites[I].first->getCalledFunction()) if (SCCFunctions.count(F)) - std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + std::swap(CallSites[I--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI); + InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -616,31 +397,28 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // calls to become direct calls. // CallSites may be modified inside so ranged for loop can not be used. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - CallSite CS = CallSites[CSi].first; + auto &P = CallSites[CSi]; + CallBase &CB = *P.first; + const int InlineHistoryID = P.second; - Function *Caller = CS.getCaller(); - Function *Callee = CS.getCalledFunction(); + Function *Caller = CB.getCaller(); + Function *Callee = CB.getCalledFunction(); // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; - Instruction *Instr = CS.getInstruction(); - - bool IsTriviallyDead = - isInstructionTriviallyDead(Instr, &GetTLI(*Caller)); + bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); - int InlineHistoryID; if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. - InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && - InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(CS, "recursive"); + inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { + setInlineRemark(CB, "recursive"); continue; } } @@ -650,56 +428,49 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // just become a regular analysis dependency. OptimizationRemarkEmitter ORE(Caller); - Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); + auto OIC = shouldInline(CB, GetInlineCost, ORE); // If the policy determines that we should inline this function, // delete the call instead. - if (!OIC.hasValue()) { - setInlineRemark(CS, "deferred"); - continue; - } - - if (!OIC.getValue()) { - // shouldInline() call returned a negative inline cost that explains - // why this callsite should not be inlined. - setInlineRemark(CS, inlineCostStr(*OIC)); + if (!OIC) continue; - } // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (IsTriviallyDead) { - LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n"); + LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); // Update the call graph by deleting the edge from Callee to Caller. - setInlineRemark(CS, "trivially dead"); - CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); - Instr->eraseFromParent(); + setInlineRemark(CB, "trivially dead"); + CG[Caller]->removeCallEdgeFor(CB); + CB.eraseFromParent(); ++NumCallsDeleted; } else { - // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS->getDebugLoc(); - BasicBlock *Block = CS.getParent(); + // Get DebugLoc to report. CB will be invalid after Inliner. + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *Block = CB.getParent(); // Attempt to inline the function. using namespace ore; - InlineResult IR = InlineCallIfPossible( - CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, + InlineResult IR = inlineCallIfPossible( + CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime, AARGetter, ImportedFunctionsStats); - if (!IR) { - setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC)); + if (!IR.isSuccess()) { + setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + + inlineCostStr(*OIC)); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller) << ": " << NV("Reason", IR.message); + << NV("Caller", Caller) << ": " + << NV("Reason", IR.getFailureReason()); }); continue; } ++NumInlined; - emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC); + emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC); // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. @@ -709,8 +480,23 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, int NewHistoryID = InlineHistory.size(); InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); - for (Value *Ptr : InlineInfo.InlinedCalls) - CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); +#ifndef NDEBUG + // Make sure no dupplicates in the inline candidates. This could + // happen when a callsite is simpilfied to reusing the return value + // of another callsite during function cloning, thus the other + // callsite will be reconsidered here. + DenseSet<CallBase *> DbgCallSites; + for (auto &II : CallSites) + DbgCallSites.insert(II.first); +#endif + + for (Value *Ptr : InlineInfo.InlinedCalls) { +#ifndef NDEBUG + assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0); +#endif + CallSites.push_back( + std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID)); + } } } @@ -759,7 +545,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); ACT = &getAnalysis<AssumptionCacheTracker>(); PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - auto GetTLI = [&](Function &F) -> TargetLibraryInfo & { + GetTLI = [&](Function &F) -> const TargetLibraryInfo & { return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); }; auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { @@ -767,7 +553,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { }; return inlineCallsImpl( SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, - [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this), + [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), ImportedFunctionsStats); } @@ -870,16 +656,47 @@ InlinerPass::~InlinerPass() { } } +InlineAdvisor & +InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, + FunctionAnalysisManager &FAM, Module &M) { + auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M); + if (!IAA) { + // It should still be possible to run the inliner as a stand-alone SCC pass, + // for test scenarios. In that case, we default to the + // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass + // runs. It also uses just the default InlineParams. + // In this case, we need to use the provided FAM, which is valid for the + // duration of the inliner pass, and thus the lifetime of the owned advisor. + // The one we would get from the MAM can be invalidated as a result of the + // inliner's activity. + OwnedDefaultAdvisor.emplace(FAM, getInlineParams()); + return *OwnedDefaultAdvisor; + } + assert(IAA->getAdvisor() && + "Expected a present InlineAdvisorAnalysis also have an " + "InlineAdvisor initialized"); + return *IAA->getAdvisor(); +} + PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { - const ModuleAnalysisManager &MAM = - AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); + const auto &MAMProxy = + AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG); bool Changed = false; assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); Module &M = *InitialC.begin()->getFunction().getParent(); - ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); + ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M); + + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) + .getManager(); + + InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M); + Advisor.onPassEntry(); + + auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); }); if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { @@ -912,11 +729,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. - SmallVector<std::pair<CallSite, int>, 16> Calls; - - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) - .getManager(); + SmallVector<std::pair<CallBase *, int>, 16> Calls; // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { @@ -928,17 +741,17 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // FIXME: Using instructions sequence is a really bad way to do this. // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) - if (auto CS = CallSite(&I)) - if (Function *Callee = CS.getCalledFunction()) { + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) - Calls.push_back({CS, -1}); + Calls.push_back({CB, -1}); else if (!isa<IntrinsicInst>(I)) { using namespace ore; - setInlineRemark(CS, "unavailable definition"); + setInlineRemark(*CB, "unavailable definition"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", CS.getCaller()) + << NV("Caller", CB->getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); @@ -969,68 +782,41 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // Loop forward over all of the calls. Note that we cannot cache the size as // inlining can introduce new calls that need to be processed. - for (int i = 0; i < (int)Calls.size(); ++i) { + for (int I = 0; I < (int)Calls.size(); ++I) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. - Function &F = *Calls[i].first.getCaller(); + Function &F = *Calls[I].first->getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) continue; - if (F.hasOptNone()) { - setInlineRemark(Calls[i].first, "optnone attribute"); + if (!Calls[I].first->getCalledFunction()->hasFnAttribute( + Attribute::AlwaysInline) && + F.hasOptNone()) { + setInlineRemark(*Calls[I].first, "optnone attribute"); continue; } LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"); - // Get a FunctionAnalysisManager via a proxy for this particular node. We - // do this each time we visit a node as the SCC may have changed and as - // we're going to mutate this particular function we want to make sure the - // proxy is in place to forward any invalidation events. We can use the - // manager we get here for looking up results for functions other than this - // node however because those functions aren't going to be mutated by this - // pass. - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) - .getManager(); - - // Get the remarks emission analysis for the caller. - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - - std::function<AssumptionCache &(Function &)> GetAssumptionCache = - [&](Function &F) -> AssumptionCache & { + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; - auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { - return FAM.getResult<BlockFrequencyAnalysis>(F); - }; - - auto GetInlineCost = [&](CallSite CS) { - Function &Callee = *CS.getCalledFunction(); - auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); - bool RemarksEnabled = - Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( - DEBUG_TYPE); - return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params, - CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, - RemarksEnabled ? &ORE : nullptr); - }; - // Now process as many calls as we have within this caller in the sequnece. + // Now process as many calls as we have within this caller in the sequence. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; - for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) { - int InlineHistoryID; - CallSite CS; - std::tie(CS, InlineHistoryID) = Calls[i]; - Function &Callee = *CS.getCalledFunction(); + for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) { + auto &P = Calls[I]; + CallBase *CB = P.first; + const int InlineHistoryID = P.second; + Function &Callee = *CB->getCalledFunction(); if (InlineHistoryID != -1 && - InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(CS, "recursive"); + inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { + setInlineRemark(*CB, "recursive"); continue; } @@ -1044,62 +830,53 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); - setInlineRemark(CS, "recursive SCC split"); + setInlineRemark(*CB, "recursive SCC split"); continue; } - Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); + auto Advice = Advisor.getAdvice(*CB); // Check whether we want to inline this callsite. - if (!OIC.hasValue()) { - setInlineRemark(CS, "deferred"); - continue; - } - - if (!OIC.getValue()) { - // shouldInline() call returned a negative inline cost that explains - // why this callsite should not be inlined. - setInlineRemark(CS, inlineCostStr(*OIC)); + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); continue; } // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - /*cg=*/nullptr, &GetAssumptionCache, PSI, - &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())), + /*cg=*/nullptr, GetAssumptionCache, PSI, + &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())), &FAM.getResult<BlockFrequencyAnalysis>(Callee)); - // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS->getDebugLoc(); - BasicBlock *Block = CS.getParent(); - - using namespace ore; - - InlineResult IR = InlineFunction(CS, IFI); - if (!IR) { - setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC)); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", &Callee) << " will not be inlined into " - << NV("Caller", &F) << ": " << NV("Reason", IR.message); - }); + InlineResult IR = InlineFunction(*CB, IFI); + if (!IR.isSuccess()) { + Advice->recordUnsuccessfulInlining(IR); continue; } + DidInline = true; InlinedCallees.insert(&Callee); - ++NumInlined; - emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC); - // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); - for (CallSite &CS : reverse(IFI.InlinedCallSites)) - if (Function *NewCallee = CS.getCalledFunction()) + + for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { + Function *NewCallee = ICB->getCalledFunction(); + if (!NewCallee) { + // Try to promote an indirect (virtual) call without waiting for + // the post-inline cleanup and the next DevirtSCCRepeatedPass + // iteration because the next iteration may not happen and we may + // miss inlining it. + if (tryPromoteCall(*ICB)) + NewCallee = ICB->getCalledFunction(); + } + if (NewCallee) if (!NewCallee->isDeclaration()) - Calls.push_back({CS, NewHistoryID}); + Calls.push_back({ICB, NewHistoryID}); + } } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) @@ -1112,15 +889,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // dead. In that case, we can drop the body of the function eagerly // which may reduce the number of callers of other functions to one, // changing inline cost thresholds. + bool CalleeWasDeleted = false; if (Callee.hasLocalLinkage()) { // To check this we also need to nuke any dead constant uses (perhaps // made dead by this operation on other functions). Callee.removeDeadConstantUsers(); if (Callee.use_empty() && !CG.isLibFunction(Callee)) { Calls.erase( - std::remove_if(Calls.begin() + i + 1, Calls.end(), - [&Callee](const std::pair<CallSite, int> &Call) { - return Call.first.getCaller() == &Callee; + std::remove_if(Calls.begin() + I + 1, Calls.end(), + [&](const std::pair<CallBase *, int> &Call) { + return Call.first->getCaller() == &Callee; }), Calls.end()); // Clear the body and queue the function itself for deletion when we @@ -1131,13 +909,18 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); + CalleeWasDeleted = true; } } + if (CalleeWasDeleted) + Advice->recordInliningWithCalleeDeleted(); + else + Advice->recordInlining(); } // Back the call index up by one to put us in a good position to go around // the outer loop. - --i; + --I; if (!DidInline) continue; @@ -1163,8 +946,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // essentially do all of the same things as a function pass and we can // re-use the exact same logic for updating the call graph to reflect the // change. + + // Inside the update, we also update the FunctionAnalysisManager in the + // proxy for this particular SCC. We do this as the SCC may have changed and + // as we're going to mutate this particular function we want to make sure + // the proxy is in place to forward any invalidation events. LazyCallGraph::SCC *OldC = C; - C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); + C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR, FAM); LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); RC = &C->getOuterRefSCC(); @@ -1208,11 +996,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // sets. for (Function *DeadF : DeadFunctions) { // Get the necessary information out of the call graph and nuke the - // function there. Also, cclear out any cached analyses. + // function there. Also, clear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG) - .getManager(); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); @@ -1224,7 +1009,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, UR.InvalidatedRefSCCs.insert(&DeadRC); // And delete the actual function from the module. - M.getFunctionList().erase(DeadF); + // The Advisor may use Function pointers to efficiently index various + // internal maps, e.g. for memoization. Function cleanup passes like + // argument promotion create new functions. It is possible for a new + // function to be allocated at the address of a deleted function. We could + // index using names, but that's inefficient. Alternatively, we let the + // Advisor free the functions when it sees fit. + DeadF->getBasicBlockList().clear(); + M.getFunctionList().remove(DeadF); + ++NumDeleted; } @@ -1237,3 +1030,45 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); return PA; } + +ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, + bool Debugging, + InliningAdvisorMode Mode, + unsigned MaxDevirtIterations) + : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations), + PM(Debugging), MPM(Debugging) { + // Run the inliner first. The theory is that we are walking bottom-up and so + // the callees have already been fully optimized, and we want to inline them + // into the callers so that our optimizations can reflect that. + // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + PM.addPass(InlinerPass()); +} + +PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, + ModuleAnalysisManager &MAM) { + auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M); + if (!IAA.tryCreate(Params, Mode)) { + M.getContext().emitError( + "Could not setup Inlining Advisor for the requested " + "mode and/or options"); + return PreservedAnalyses::all(); + } + + // We wrap the CGSCC pipeline in a devirtualization repeater. This will try + // to detect when we devirtualize indirect calls and iterate the SCC passes + // in that case to try and catch knock-on inlining or function attrs + // opportunities. Then we add it to the module pipeline by walking the SCCs + // in postorder (or bottom-up). + // If MaxDevirtIterations is 0, we just don't use the devirtualization + // wrapper. + if (MaxDevirtIterations == 0) + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM))); + else + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations))); + auto Ret = MPM.run(M, MAM); + + IAA.clear(); + return Ret; +} diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp index f7108e8002ac9..f7f5b4cf67041 100644 --- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -15,7 +15,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -36,22 +36,30 @@ using namespace llvm; STATISTIC(NumExtracted, "Number of loops extracted"); namespace { - struct LoopExtractor : public LoopPass { + struct LoopExtractor : public ModulePass { static char ID; // Pass identification, replacement for typeid + + // The number of natural loops to extract from the program into functions. unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : LoopPass(ID), NumLoops(numLoops) { - initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); - } + : ModulePass(ID), NumLoops(numLoops) { + initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); + } - bool runOnLoop(Loop *L, LPPassManager &) override; + bool runOnModule(Module &M) override; + bool runOnFunction(Function &F); + + bool extractLoops(Loop::iterator From, Loop::iterator To, LoopInfo &LI, + DominatorTree &DT); + bool extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(BreakCriticalEdgesID); - AU.addRequiredID(LoopSimplifyID); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addRequiredID(LoopSimplifyID); AU.addUsedIfAvailable<AssumptionCacheTracker>(); } }; @@ -61,8 +69,9 @@ char LoopExtractor::ID = 0; INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", "Extract loops into new functions", false, false) INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopExtractor, "loop-extract", "Extract loops into new functions", false, false) @@ -83,81 +92,130 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", // Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } -bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { - if (skipLoop(L)) +bool LoopExtractor::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + if (M.empty()) + return false; + + if (!NumLoops) return false; - // Only visit top-level loops. - if (L->getParentLoop()) + bool Changed = false; + + // The end of the function list may change (new functions will be added at the + // end), so we run from the first to the current last. + auto I = M.begin(), E = --M.end(); + while (true) { + Function &F = *I; + + Changed |= runOnFunction(F); + if (!NumLoops) + break; + + // If this is the last function. + if (I == E) + break; + + ++I; + } + return Changed; +} + +bool LoopExtractor::runOnFunction(Function &F) { + // Do not modify `optnone` functions. + if (F.hasOptNone()) return false; - // If LoopSimplify form is not available, stay out of trouble. - if (!L->isLoopSimplifyForm()) + if (F.empty()) return false; - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); bool Changed = false; + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(F, &Changed).getLoopInfo(); + + // If there are no loops in the function. + if (LI.empty()) + return Changed; + + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); // If there is more than one top-level loop in this function, extract all of - // the loops. Otherwise there is exactly one top-level loop; in this case if - // this function is more than a minimal wrapper around the loop, extract - // the loop. - bool ShouldExtractLoop = false; - - // Extract the loop if the entry block doesn't branch to the loop header. - Instruction *EntryTI = - L->getHeader()->getParent()->getEntryBlock().getTerminator(); - if (!isa<BranchInst>(EntryTI) || - !cast<BranchInst>(EntryTI)->isUnconditional() || - EntryTI->getSuccessor(0) != L->getHeader()) { - ShouldExtractLoop = true; - } else { - // Check to see if any exits from the loop are more than just return - // blocks. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { - ShouldExtractLoop = true; - break; - } + // the loops. + if (std::next(LI.begin()) != LI.end()) + return Changed | extractLoops(LI.begin(), LI.end(), LI, DT); + + // Otherwise there is exactly one top-level loop. + Loop *TLL = *LI.begin(); + + // If the loop is in LoopSimplify form, then extract it only if this function + // is more than a minimal wrapper around the loop. + if (TLL->isLoopSimplifyForm()) { + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + Instruction *EntryTI = F.getEntryBlock().getTerminator(); + if (!isa<BranchInst>(EntryTI) || + !cast<BranchInst>(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != TLL->getHeader()) { + ShouldExtractLoop = true; + } else { + // Check to see if any exits from the loop are more than just return + // blocks. + SmallVector<BasicBlock *, 8> ExitBlocks; + TLL->getExitBlocks(ExitBlocks); + for (auto *ExitBlock : ExitBlocks) + if (!isa<ReturnInst>(ExitBlock->getTerminator())) { + ShouldExtractLoop = true; + break; + } + } + + if (ShouldExtractLoop) + return Changed | extractLoop(TLL, LI, DT); } - if (ShouldExtractLoop) { - // We must omit EH pads. EH pads must accompany the invoke - // instruction. But this would result in a loop in the extracted - // function. An infinite cycle occurs when it tries to extract that loop as - // well. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i]->isEHPad()) { - ShouldExtractLoop = false; - break; - } + // Okay, this function is a minimal container around the specified loop. + // If we extract the loop, we will continue to just keep extracting it + // infinitely... so don't extract it. However, if the loop contains any + // sub-loops, extract them. + return Changed | extractLoops(TLL->begin(), TLL->end(), LI, DT); +} + +bool LoopExtractor::extractLoops(Loop::iterator From, Loop::iterator To, + LoopInfo &LI, DominatorTree &DT) { + bool Changed = false; + SmallVector<Loop *, 8> Loops; + + // Save the list of loops, as it may change. + Loops.assign(From, To); + for (Loop *L : Loops) { + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) + continue; + + Changed |= extractLoop(L, LI, DT); + if (!NumLoops) + break; } + return Changed; +} - if (ShouldExtractLoop) { - if (NumLoops == 0) return Changed; +bool LoopExtractor::extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT) { + assert(NumLoops != 0); + AssumptionCache *AC = nullptr; + Function &Func = *L->getHeader()->getParent(); + if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>()) + AC = ACT->lookupAssumptionCache(Func); + CodeExtractorAnalysisCache CEAC(Func); + CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); + if (Extractor.extractCodeRegion(CEAC)) { + LI.erase(L); --NumLoops; - AssumptionCache *AC = nullptr; - Function &Func = *L->getHeader()->getParent(); - if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>()) - AC = ACT->lookupAssumptionCache(Func); - CodeExtractorAnalysisCache CEAC(Func); - CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); - if (Extractor.extractCodeRegion(CEAC) != nullptr) { - Changed = true; - // After extraction, the loop is replaced by a function call, so - // we shouldn't try to run any more loop passes on it. - LPM.markLoopAsDeleted(*L); - LI.erase(L); - } ++NumExtracted; + return true; } - - return Changed; + return false; } // createSingleLoopExtractorPass - This pass extracts one natural loop from the diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index fa664966faf74..8eef7e3e7e999 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -382,6 +382,9 @@ class LowerTypeTestsModule { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + // Set when the client has invoked this to simply drop all type test assume + // sequences. + bool DropTypeTests; Triple::ArchType Arch; Triple::OSType OS; @@ -500,7 +503,8 @@ class LowerTypeTestsModule { public: LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests); bool lower(); @@ -516,22 +520,24 @@ struct LowerTypeTests : public ModulePass { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool DropTypeTests; LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } LowerTypeTests(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary) { + ImportSummary(ImportSummary), DropTypeTests(DropTypeTests) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { if (UseCommandLine) return LowerTypeTestsModule::runForTesting(M); - return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); + return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + .lower(); } }; @@ -544,8 +550,9 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, ModulePass * llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - return new LowerTypeTests(ExportSummary, ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests) { + return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests); } /// Build a bit set for TypeId using the object layouts in @@ -728,6 +735,9 @@ static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, /// replace the call with. Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, const TypeIdLowering &TIL) { + // Delay lowering if the resolution is currently unknown. + if (TIL.TheKind == TypeTestResolution::Unknown) + return nullptr; if (TIL.TheKind == TypeTestResolution::Unsat) return ConstantInt::getFalse(M.getContext()); @@ -828,11 +838,10 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( uint64_t DesiredPadding = 0; for (GlobalTypeMember *G : Globals) { auto *GV = cast<GlobalVariable>(G->getGlobal()); - MaybeAlign Alignment(GV->getAlignment()); - if (!Alignment) - Alignment = Align(DL.getABITypeAlignment(GV->getValueType())); - MaxAlign = std::max(MaxAlign, *Alignment); - uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment); + Align Alignment = + DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType()); + MaxAlign = std::max(MaxAlign, Alignment); + uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, Alignment); GlobalLayout[G] = GVOffset; if (GVOffset != 0) { uint64_t Padding = GVOffset - CurOffset; @@ -1030,14 +1039,18 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { report_fatal_error("Second argument of llvm.type.test must be metadata"); auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata()); + // If this is a local unpromoted type, which doesn't have a metadata string, + // treat as Unknown and delay lowering, so that we can still utilize it for + // later optimizations. if (!TypeIdStr) - report_fatal_error( - "Second argument of llvm.type.test must be a metadata string"); + return; TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } // ThinLTO backend: the function F has a jump table entry; update this module @@ -1048,7 +1061,7 @@ void LowerTypeTestsModule::importFunction( assert(F->getType()->getAddressSpace() == 0); GlobalValue::VisibilityTypes Visibility = F->getVisibility(); - std::string Name = F->getName(); + std::string Name = std::string(F->getName()); if (F->isDeclarationForLinker() && isJumpTableCanonical) { // Non-dso_local functions may be overriden at run time, @@ -1160,8 +1173,10 @@ void LowerTypeTestsModule::lowerTypeTestCalls( for (CallInst *CI : TIUI.CallSites) { ++NumTypeTestCallsLowered; Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); + if (Lowered) { + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } } } } @@ -1269,7 +1284,7 @@ void LowerTypeTestsModule::moveInitializerToModuleConstructor( IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator()); GV->setConstant(false); - IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlignment()); + IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlign()); GV->setInitializer(Constant::getNullValue(GV->getValueType())); } @@ -1516,13 +1531,13 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( F->getType()); if (Functions[I]->isExported()) { if (IsJumpTableCanonical) { - ExportSummary->cfiFunctionDefs().insert(F->getName()); + ExportSummary->cfiFunctionDefs().insert(std::string(F->getName())); } else { GlobalAlias *JtAlias = GlobalAlias::create( F->getValueType(), 0, GlobalValue::ExternalLinkage, F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); JtAlias->setVisibility(GlobalValue::HiddenVisibility); - ExportSummary->cfiFunctionDecls().insert(F->getName()); + ExportSummary->cfiFunctionDecls().insert(std::string(F->getName())); } } if (!IsJumpTableCanonical) { @@ -1655,8 +1670,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( /// Lower all type tests in this module. LowerTypeTestsModule::LowerTypeTestsModule( Module &M, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) + : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DropTypeTests(DropTypeTests) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); Arch = TargetTriple.getArch(); @@ -1683,7 +1699,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool Changed = LowerTypeTestsModule( M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, - ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr) + ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, + /*DropTypeTests*/ false) .lower(); if (!ClWriteSummary.empty()) { @@ -1703,8 +1720,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { static bool isDirectCall(Use& U) { auto *Usr = dyn_cast<CallInst>(U.getUser()); if (Usr) { - CallSite CS(Usr); - if (CS.isCallee(&U)) + auto *CB = dyn_cast<CallBase>(Usr); + if (CB && CB->isCallee(&U)) return true; } return false; @@ -1750,6 +1767,33 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { } bool LowerTypeTestsModule::lower() { + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + + if (DropTypeTests && TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast<CallInst>((*UI++).getUser()); + // Find and erase llvm.assume intrinsics for this llvm.type.test call. + for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;) { + if (auto *AssumeCI = dyn_cast<CallInst>((*CIU++).getUser())) { + Function *F = AssumeCI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::assume) + AssumeCI->eraseFromParent(); + } + } + CI->eraseFromParent(); + } + + // We have deleted the type intrinsics, so we no longer have enough + // information to reason about the liveness of virtual function pointers + // in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + + return true; + } + // If only some of the modules were split, we cannot correctly perform // this transformation. We already checked for the presense of type tests // with partially split modules during the thin link, and would have emitted @@ -1758,8 +1802,6 @@ bool LowerTypeTestsModule::lower() { (ImportSummary && ImportSummary->partiallySplitLTOUnits())) return false; - Function *TypeTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *ICallBranchFunnelFunc = M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel)); if ((!TypeTestFunc || TypeTestFunc->use_empty()) && @@ -1787,9 +1829,10 @@ bool LowerTypeTestsModule::lower() { // have the same name, but it's not the one we are looking for. if (F.hasLocalLinkage()) continue; - if (ImportSummary->cfiFunctionDefs().count(F.getName())) + if (ImportSummary->cfiFunctionDefs().count(std::string(F.getName()))) Defs.push_back(&F); - else if (ImportSummary->cfiFunctionDecls().count(F.getName())) + else if (ImportSummary->cfiFunctionDecls().count( + std::string(F.getName()))) Decls.push_back(&F); } @@ -2196,7 +2239,9 @@ bool LowerTypeTestsModule::lower() { PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); + bool Changed = + LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + .lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 06d2a2f319412..8cc19515f3db8 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -95,7 +95,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -467,13 +466,13 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) { Use *U = &*UI; ++UI; - CallSite CS(U->getUser()); - if (CS && CS.isCallee(U)) { + CallBase *CB = dyn_cast<CallBase>(U->getUser()); + if (CB && CB->isCallee(U)) { // Do not copy attributes from the called function to the call-site. // Function comparison ensures that the attributes are the same up to // type congruences in byval(), in which case we need to keep the byval // type of the call-site, not the callee function. - remove(CS.getInstruction()->getFunction()); + remove(CB->getFunction()); U->set(BitcastNew); } } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp new file mode 100644 index 0000000000000..f664a24173747 --- /dev/null +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -0,0 +1,1501 @@ +//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OpenMP specific optimizations: +// +// - Deduplication of runtime calls, e.g., omp_get_thread_num. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/OpenMPOpt.h" + +#include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/Utils/CallGraphUpdater.h" + +using namespace llvm; +using namespace omp; + +#define DEBUG_TYPE "openmp-opt" + +static cl::opt<bool> DisableOpenMPOptimizations( + "openmp-opt-disable", cl::ZeroOrMore, + cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, + cl::init(false)); + +static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), + cl::Hidden); +static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", + cl::init(false), cl::Hidden); + +STATISTIC(NumOpenMPRuntimeCallsDeduplicated, + "Number of OpenMP runtime calls deduplicated"); +STATISTIC(NumOpenMPParallelRegionsDeleted, + "Number of OpenMP parallel regions deleted"); +STATISTIC(NumOpenMPRuntimeFunctionsIdentified, + "Number of OpenMP runtime functions identified"); +STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, + "Number of OpenMP runtime function uses identified"); +STATISTIC(NumOpenMPTargetRegionKernels, + "Number of OpenMP target region entry points (=kernels) identified"); +STATISTIC( + NumOpenMPParallelRegionsReplacedInGPUStateMachine, + "Number of OpenMP parallel regions replaced with ID in GPU state machines"); + +#if !defined(NDEBUG) +static constexpr auto TAG = "[" DEBUG_TYPE "]"; +#endif + +/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is +/// true, constant expression users are not given to \p CB but their uses are +/// traversed transitively. +template <typename CBTy> +static void foreachUse(Function &F, CBTy CB, + bool LookThroughConstantExprUses = true) { + SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); + + for (unsigned idx = 0; idx < Worklist.size(); ++idx) { + Use &U = *Worklist[idx]; + + // Allow use in constant bitcasts and simply look through them. + if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { + for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) + Worklist.push_back(&CEU); + continue; + } + + CB(U); + } +} + +/// Helper struct to store tracked ICV values at specif instructions. +struct ICVValue { + Instruction *Inst; + Value *TrackedValue; + + ICVValue(Instruction *I, Value *Val) : Inst(I), TrackedValue(Val) {} +}; + +namespace llvm { + +// Provide DenseMapInfo for ICVValue +template <> struct DenseMapInfo<ICVValue> { + using InstInfo = DenseMapInfo<Instruction *>; + using ValueInfo = DenseMapInfo<Value *>; + + static inline ICVValue getEmptyKey() { + return ICVValue(InstInfo::getEmptyKey(), ValueInfo::getEmptyKey()); + }; + + static inline ICVValue getTombstoneKey() { + return ICVValue(InstInfo::getTombstoneKey(), ValueInfo::getTombstoneKey()); + }; + + static unsigned getHashValue(const ICVValue &ICVVal) { + return detail::combineHashValue( + InstInfo::getHashValue(ICVVal.Inst), + ValueInfo::getHashValue(ICVVal.TrackedValue)); + } + + static bool isEqual(const ICVValue &LHS, const ICVValue &RHS) { + return InstInfo::isEqual(LHS.Inst, RHS.Inst) && + ValueInfo::isEqual(LHS.TrackedValue, RHS.TrackedValue); + } +}; + +} // end namespace llvm + +namespace { + +struct AAICVTracker; + +/// OpenMP specific information. For now, stores RFIs and ICVs also needed for +/// Attributor runs. +struct OMPInformationCache : public InformationCache { + OMPInformationCache(Module &M, AnalysisGetter &AG, + BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, + SmallPtrSetImpl<Kernel> &Kernels) + : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), + Kernels(Kernels) { + initializeModuleSlice(CGSCC); + + OMPBuilder.initialize(); + initializeRuntimeFunctions(); + initializeInternalControlVars(); + } + + /// Generic information that describes an internal control variable. + struct InternalControlVarInfo { + /// The kind, as described by InternalControlVar enum. + InternalControlVar Kind; + + /// The name of the ICV. + StringRef Name; + + /// Environment variable associated with this ICV. + StringRef EnvVarName; + + /// Initial value kind. + ICVInitValue InitKind; + + /// Initial value. + ConstantInt *InitValue; + + /// Setter RTL function associated with this ICV. + RuntimeFunction Setter; + + /// Getter RTL function associated with this ICV. + RuntimeFunction Getter; + + /// RTL Function corresponding to the override clause of this ICV + RuntimeFunction Clause; + }; + + /// Generic information that describes a runtime function + struct RuntimeFunctionInfo { + + /// The kind, as described by the RuntimeFunction enum. + RuntimeFunction Kind; + + /// The name of the function. + StringRef Name; + + /// Flag to indicate a variadic function. + bool IsVarArg; + + /// The return type of the function. + Type *ReturnType; + + /// The argument types of the function. + SmallVector<Type *, 8> ArgumentTypes; + + /// The declaration if available. + Function *Declaration = nullptr; + + /// Uses of this runtime function per function containing the use. + using UseVector = SmallVector<Use *, 16>; + + /// Clear UsesMap for runtime function. + void clearUsesMap() { UsesMap.clear(); } + + /// Boolean conversion that is true if the runtime function was found. + operator bool() const { return Declaration; } + + /// Return the vector of uses in function \p F. + UseVector &getOrCreateUseVector(Function *F) { + std::shared_ptr<UseVector> &UV = UsesMap[F]; + if (!UV) + UV = std::make_shared<UseVector>(); + return *UV; + } + + /// Return the vector of uses in function \p F or `nullptr` if there are + /// none. + const UseVector *getUseVector(Function &F) const { + auto I = UsesMap.find(&F); + if (I != UsesMap.end()) + return I->second.get(); + return nullptr; + } + + /// Return how many functions contain uses of this runtime function. + size_t getNumFunctionsWithUses() const { return UsesMap.size(); } + + /// Return the number of arguments (or the minimal number for variadic + /// functions). + size_t getNumArgs() const { return ArgumentTypes.size(); } + + /// Run the callback \p CB on each use and forget the use if the result is + /// true. The callback will be fed the function in which the use was + /// encountered as second argument. + void foreachUse(SmallVectorImpl<Function *> &SCC, + function_ref<bool(Use &, Function &)> CB) { + for (Function *F : SCC) + foreachUse(CB, F); + } + + /// Run the callback \p CB on each use within the function \p F and forget + /// the use if the result is true. + void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { + SmallVector<unsigned, 8> ToBeDeleted; + ToBeDeleted.clear(); + + unsigned Idx = 0; + UseVector &UV = getOrCreateUseVector(F); + + for (Use *U : UV) { + if (CB(*U, *F)) + ToBeDeleted.push_back(Idx); + ++Idx; + } + + // Remove the to-be-deleted indices in reverse order as prior + // modifications will not modify the smaller indices. + while (!ToBeDeleted.empty()) { + unsigned Idx = ToBeDeleted.pop_back_val(); + UV[Idx] = UV.back(); + UV.pop_back(); + } + } + + private: + /// Map from functions to all uses of this runtime function contained in + /// them. + DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; + }; + + /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains + /// (a subset of) all functions that we can look at during this SCC traversal. + /// This includes functions (transitively) called from the SCC and the + /// (transitive) callers of SCC functions. We also can look at a function if + /// there is a "reference edge", i.a., if the function somehow uses (!=calls) + /// a function in the SCC or a caller of a function in the SCC. + void initializeModuleSlice(SetVector<Function *> &SCC) { + ModuleSlice.insert(SCC.begin(), SCC.end()); + + SmallPtrSet<Function *, 16> Seen; + SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + for (Instruction &I : instructions(*F)) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *Callee = CB->getCalledFunction()) + if (Seen.insert(Callee).second) + Worklist.push_back(Callee); + } + + Seen.clear(); + Worklist.append(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + // Traverse all transitive uses. + foreachUse(*F, [&](Use &U) { + if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) + if (Seen.insert(UsrI->getFunction()).second) + Worklist.push_back(UsrI->getFunction()); + }); + } + } + + /// The slice of the module we are allowed to look at. + SmallPtrSet<Function *, 8> ModuleSlice; + + /// An OpenMP-IR-Builder instance + OpenMPIRBuilder OMPBuilder; + + /// Map from runtime function kind to the runtime function description. + EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, + RuntimeFunction::OMPRTL___last> + RFIs; + + /// Map from ICV kind to the ICV description. + EnumeratedArray<InternalControlVarInfo, InternalControlVar, + InternalControlVar::ICV___last> + ICVs; + + /// Helper to initialize all internal control variable information for those + /// defined in OMPKinds.def. + void initializeInternalControlVars() { +#define ICV_RT_SET(_Name, RTL) \ + { \ + auto &ICV = ICVs[_Name]; \ + ICV.Setter = RTL; \ + } +#define ICV_RT_GET(Name, RTL) \ + { \ + auto &ICV = ICVs[Name]; \ + ICV.Getter = RTL; \ + } +#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ + { \ + auto &ICV = ICVs[Enum]; \ + ICV.Name = _Name; \ + ICV.Kind = Enum; \ + ICV.InitKind = Init; \ + ICV.EnvVarName = _EnvVarName; \ + switch (ICV.InitKind) { \ + case ICV_IMPLEMENTATION_DEFINED: \ + ICV.InitValue = nullptr; \ + break; \ + case ICV_ZERO: \ + ICV.InitValue = ConstantInt::get( \ + Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ + break; \ + case ICV_FALSE: \ + ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ + break; \ + case ICV_LAST: \ + break; \ + } \ + } +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } + + /// Returns true if the function declaration \p F matches the runtime + /// function types, that is, return type \p RTFRetType, and argument types + /// \p RTFArgTypes. + static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, + SmallVector<Type *, 8> &RTFArgTypes) { + // TODO: We should output information to the user (under debug output + // and via remarks). + + if (!F) + return false; + if (F->getReturnType() != RTFRetType) + return false; + if (F->arg_size() != RTFArgTypes.size()) + return false; + + auto RTFTyIt = RTFArgTypes.begin(); + for (Argument &Arg : F->args()) { + if (Arg.getType() != *RTFTyIt) + return false; + + ++RTFTyIt; + } + + return true; + } + + // Helper to collect all uses of the declaration in the UsesMap. + unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { + unsigned NumUses = 0; + if (!RFI.Declaration) + return NumUses; + OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); + + if (CollectStats) { + NumOpenMPRuntimeFunctionsIdentified += 1; + NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); + } + + // TODO: We directly convert uses into proper calls and unknown uses. + for (Use &U : RFI.Declaration->uses()) { + if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { + if (ModuleSlice.count(UserI->getFunction())) { + RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); + ++NumUses; + } + } else { + RFI.getOrCreateUseVector(nullptr).push_back(&U); + ++NumUses; + } + } + return NumUses; + } + + // Helper function to recollect uses of all runtime functions. + void recollectUses() { + for (int Idx = 0; Idx < RFIs.size(); ++Idx) { + auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)]; + RFI.clearUsesMap(); + collectUses(RFI, /*CollectStats*/ false); + } + } + + /// Helper to initialize all runtime function information for those defined + /// in OpenMPKinds.def. + void initializeRuntimeFunctions() { + Module &M = *((*ModuleSlice.begin())->getParent()); + + // Helper macros for handling __VA_ARGS__ in OMP_RTL +#define OMP_TYPE(VarName, ...) \ + Type *VarName = OMPBuilder.VarName; \ + (void)VarName; + +#define OMP_ARRAY_TYPE(VarName, ...) \ + ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ + (void)VarName##Ty; \ + PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ + (void)VarName##PtrTy; + +#define OMP_FUNCTION_TYPE(VarName, ...) \ + FunctionType *VarName = OMPBuilder.VarName; \ + (void)VarName; \ + PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ + (void)VarName##Ptr; + +#define OMP_STRUCT_TYPE(VarName, ...) \ + StructType *VarName = OMPBuilder.VarName; \ + (void)VarName; \ + PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ + (void)VarName##Ptr; + +#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ + { \ + SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ + Function *F = M.getFunction(_Name); \ + if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ + auto &RFI = RFIs[_Enum]; \ + RFI.Kind = _Enum; \ + RFI.Name = _Name; \ + RFI.IsVarArg = _IsVarArg; \ + RFI.ReturnType = OMPBuilder._ReturnType; \ + RFI.ArgumentTypes = std::move(ArgsTypes); \ + RFI.Declaration = F; \ + unsigned NumUses = collectUses(RFI); \ + (void)NumUses; \ + LLVM_DEBUG({ \ + dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ + << " found\n"; \ + if (RFI.Declaration) \ + dbgs() << TAG << "-> got " << NumUses << " uses in " \ + << RFI.getNumFunctionsWithUses() \ + << " different functions.\n"; \ + }); \ + } \ + } +#include "llvm/Frontend/OpenMP/OMPKinds.def" + + // TODO: We should attach the attributes defined in OMPKinds.def. + } + + /// Collection of known kernels (\see Kernel) in the module. + SmallPtrSetImpl<Kernel> &Kernels; +}; + +struct OpenMPOpt { + + using OptimizationRemarkGetter = + function_ref<OptimizationRemarkEmitter &(Function *)>; + + OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, + OptimizationRemarkGetter OREGetter, + OMPInformationCache &OMPInfoCache, Attributor &A) + : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), + OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} + + /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. + bool run() { + if (SCC.empty()) + return false; + + bool Changed = false; + + LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() + << " functions in a slice with " + << OMPInfoCache.ModuleSlice.size() << " functions\n"); + + if (PrintICVValues) + printICVs(); + if (PrintOpenMPKernels) + printKernels(); + + Changed |= rewriteDeviceCodeStateMachine(); + + Changed |= runAttributor(); + + // Recollect uses, in case Attributor deleted any. + OMPInfoCache.recollectUses(); + + Changed |= deduplicateRuntimeCalls(); + Changed |= deleteParallelRegions(); + + return Changed; + } + + /// Print initial ICV values for testing. + /// FIXME: This should be done from the Attributor once it is added. + void printICVs() const { + InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; + + for (Function *F : OMPInfoCache.ModuleSlice) { + for (auto ICV : ICVs) { + auto ICVInfo = OMPInfoCache.ICVs[ICV]; + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) + << " Value: " + << (ICVInfo.InitValue + ? ICVInfo.InitValue->getValue().toString(10, true) + : "IMPLEMENTATION_DEFINED"); + }; + + emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); + } + } + } + + /// Print OpenMP GPU kernels for testing. + void printKernels() const { + for (Function *F : SCC) { + if (!OMPInfoCache.Kernels.count(F)) + continue; + + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP GPU kernel " + << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; + }; + + emitRemarkOnFunction(F, "OpenMPGPU", Remark); + } + } + + /// Return the call if \p U is a callee use in a regular call. If \p RFI is + /// given it has to be the callee or a nullptr is returned. + static CallInst *getCallIfRegularCall( + Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast<CallInst>(U.getUser()); + if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + + /// Return the call if \p V is a regular call. If \p RFI is given it has to be + /// the callee or a nullptr is returned. + static CallInst *getCallIfRegularCall( + Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast<CallInst>(&V); + if (CI && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + +private: + /// Try to delete parallel regions if possible. + bool deleteParallelRegions() { + const unsigned CallbackCalleeOperand = 2; + + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; + + if (!RFI.Declaration) + return false; + + bool Changed = false; + auto DeleteCallCB = [&](Use &U, Function &) { + CallInst *CI = getCallIfRegularCall(U); + if (!CI) + return false; + auto *Fn = dyn_cast<Function>( + CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); + if (!Fn) + return false; + if (!Fn->onlyReadsMemory()) + return false; + if (!Fn->hasFnAttribute(Attribute::WillReturn)) + return false; + + LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " + << CI->getCaller()->getName() << "\n"); + + auto Remark = [&](OptimizationRemark OR) { + return OR << "Parallel region in " + << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) + << " deleted"; + }; + emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", + Remark); + + CGUpdater.removeCallSite(*CI); + CI->eraseFromParent(); + Changed = true; + ++NumOpenMPParallelRegionsDeleted; + return true; + }; + + RFI.foreachUse(SCC, DeleteCallCB); + + return Changed; + } + + /// Try to eliminate runtime calls by reusing existing ones. + bool deduplicateRuntimeCalls() { + bool Changed = false; + + RuntimeFunction DeduplicableRuntimeCallIDs[] = { + OMPRTL_omp_get_num_threads, + OMPRTL_omp_in_parallel, + OMPRTL_omp_get_cancellation, + OMPRTL_omp_get_thread_limit, + OMPRTL_omp_get_supported_active_levels, + OMPRTL_omp_get_level, + OMPRTL_omp_get_ancestor_thread_num, + OMPRTL_omp_get_team_size, + OMPRTL_omp_get_active_level, + OMPRTL_omp_in_final, + OMPRTL_omp_get_proc_bind, + OMPRTL_omp_get_num_places, + OMPRTL_omp_get_num_procs, + OMPRTL_omp_get_place_num, + OMPRTL_omp_get_partition_num_places, + OMPRTL_omp_get_partition_place_nums}; + + // Global-tid is handled separately. + SmallSetVector<Value *, 16> GTIdArgs; + collectGlobalThreadIdArguments(GTIdArgs); + LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() + << " global thread ID arguments\n"); + + for (Function *F : SCC) { + for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) + deduplicateRuntimeCalls(*F, + OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); + + // __kmpc_global_thread_num is special as we can replace it with an + // argument in enough cases to make it worth trying. + Value *GTIdArg = nullptr; + for (Argument &Arg : F->args()) + if (GTIdArgs.count(&Arg)) { + GTIdArg = &Arg; + break; + } + Changed |= deduplicateRuntimeCalls( + *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); + } + + return Changed; + } + + static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, + bool GlobalOnly, bool &SingleChoice) { + if (CurrentIdent == NextIdent) + return CurrentIdent; + + // TODO: Figure out how to actually combine multiple debug locations. For + // now we just keep an existing one if there is a single choice. + if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { + SingleChoice = !CurrentIdent; + return NextIdent; + } + return nullptr; + } + + /// Return an `struct ident_t*` value that represents the ones used in the + /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not + /// return a local `struct ident_t*`. For now, if we cannot find a suitable + /// return value we create one from scratch. We also do not yet combine + /// information, e.g., the source locations, see combinedIdentStruct. + Value * + getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, + Function &F, bool GlobalOnly) { + bool SingleChoice = true; + Value *Ident = nullptr; + auto CombineIdentStruct = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || &F != &Caller) + return false; + Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), + /* GlobalOnly */ true, SingleChoice); + return false; + }; + RFI.foreachUse(SCC, CombineIdentStruct); + + if (!Ident || !SingleChoice) { + // The IRBuilder uses the insertion block to get to the module, this is + // unfortunate but we work around it for now. + if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) + OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( + &F.getEntryBlock(), F.getEntryBlock().begin())); + // Create a fallback location if non was found. + // TODO: Use the debug locations of the calls instead. + Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); + Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); + } + return Ident; + } + + /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or + /// \p ReplVal if given. + bool deduplicateRuntimeCalls(Function &F, + OMPInformationCache::RuntimeFunctionInfo &RFI, + Value *ReplVal = nullptr) { + auto *UV = RFI.getUseVector(F); + if (!UV || UV->size() + (ReplVal != nullptr) < 2) + return false; + + LLVM_DEBUG( + dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name + << (ReplVal ? " with an existing value\n" : "\n") << "\n"); + + assert((!ReplVal || (isa<Argument>(ReplVal) && + cast<Argument>(ReplVal)->getParent() == &F)) && + "Unexpected replacement value!"); + + // TODO: Use dominance to find a good position instead. + auto CanBeMoved = [this](CallBase &CB) { + unsigned NumArgs = CB.getNumArgOperands(); + if (NumArgs == 0) + return true; + if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) + return false; + for (unsigned u = 1; u < NumArgs; ++u) + if (isa<Instruction>(CB.getArgOperand(u))) + return false; + return true; + }; + + if (!ReplVal) { + for (Use *U : *UV) + if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + if (!CanBeMoved(*CI)) + continue; + + auto Remark = [&](OptimizationRemark OR) { + auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); + return OR << "OpenMP runtime call " + << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " + << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); + }; + emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark); + + CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); + ReplVal = CI; + break; + } + if (!ReplVal) + return false; + } + + // If we use a call as a replacement value we need to make sure the ident is + // valid at the new location. For now we just pick a global one, either + // existing and used by one of the calls, or created from scratch. + if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { + if (CI->getNumArgOperands() > 0 && + CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { + Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, + /* GlobalOnly */ true); + CI->setArgOperand(0, Ident); + } + } + + bool Changed = false; + auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || CI == ReplVal || &F != &Caller) + return false; + assert(CI->getCaller() == &F && "Unexpected call!"); + + auto Remark = [&](OptimizationRemark OR) { + return OR << "OpenMP runtime call " + << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; + }; + emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark); + + CGUpdater.removeCallSite(*CI); + CI->replaceAllUsesWith(ReplVal); + CI->eraseFromParent(); + ++NumOpenMPRuntimeCallsDeduplicated; + Changed = true; + return true; + }; + RFI.foreachUse(SCC, ReplaceAndDeleteCB); + + return Changed; + } + + /// Collect arguments that represent the global thread id in \p GTIdArgs. + void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { + // TODO: Below we basically perform a fixpoint iteration with a pessimistic + // initialization. We could define an AbstractAttribute instead and + // run the Attributor here once it can be run as an SCC pass. + + // Helper to check the argument \p ArgNo at all call sites of \p F for + // a GTId. + auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { + if (!F.hasLocalLinkage()) + return false; + for (Use &U : F.uses()) { + if (CallInst *CI = getCallIfRegularCall(U)) { + Value *ArgOp = CI->getArgOperand(ArgNo); + if (CI == &RefCI || GTIdArgs.count(ArgOp) || + getCallIfRegularCall( + *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) + continue; + } + return false; + } + return true; + }; + + // Helper to identify uses of a GTId as GTId arguments. + auto AddUserArgs = [&](Value >Id) { + for (Use &U : GTId.uses()) + if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) + if (CI->isArgOperand(&U)) + if (Function *Callee = CI->getCalledFunction()) + if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) + GTIdArgs.insert(Callee->getArg(U.getOperandNo())); + }; + + // The argument users of __kmpc_global_thread_num calls are GTIds. + OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; + + GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { + if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) + AddUserArgs(*CI); + return false; + }); + + // Transitively search for more arguments by looking at the users of the + // ones we know already. During the search the GTIdArgs vector is extended + // so we cannot cache the size nor can we use a range based for. + for (unsigned u = 0; u < GTIdArgs.size(); ++u) + AddUserArgs(*GTIdArgs[u]); + } + + /// Kernel (=GPU) optimizations and utility functions + /// + ///{{ + + /// Check if \p F is a kernel, hence entry point for target offloading. + bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } + + /// Cache to remember the unique kernel for a function. + DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; + + /// Find the unique kernel that will execute \p F, if any. + Kernel getUniqueKernelFor(Function &F); + + /// Find the unique kernel that will execute \p I, if any. + Kernel getUniqueKernelFor(Instruction &I) { + return getUniqueKernelFor(*I.getFunction()); + } + + /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in + /// the cases we can avoid taking the address of a function. + bool rewriteDeviceCodeStateMachine(); + + /// + ///}} + + /// Emit a remark generically + /// + /// This template function can be used to generically emit a remark. The + /// RemarkKind should be one of the following: + /// - OptimizationRemark to indicate a successful optimization attempt + /// - OptimizationRemarkMissed to report a failed optimization attempt + /// - OptimizationRemarkAnalysis to provide additional information about an + /// optimization attempt + /// + /// The remark is built using a callback function provided by the caller that + /// takes a RemarkKind as input and returns a RemarkKind. + template <typename RemarkKind, + typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>> + void emitRemark(Instruction *Inst, StringRef RemarkName, + RemarkCallBack &&RemarkCB) const { + Function *F = Inst->getParent()->getParent(); + auto &ORE = OREGetter(F); + + ORE.emit( + [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); + } + + /// Emit a remark on a function. Since only OptimizationRemark is supporting + /// this, it can't be made generic. + void + emitRemarkOnFunction(Function *F, StringRef RemarkName, + function_ref<OptimizationRemark(OptimizationRemark &&)> + &&RemarkCB) const { + auto &ORE = OREGetter(F); + + ORE.emit([&]() { + return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); + }); + } + + /// The underlying module. + Module &M; + + /// The SCC we are operating on. + SmallVectorImpl<Function *> &SCC; + + /// Callback to update the call graph, the first argument is a removed call, + /// the second an optional replacement call. + CallGraphUpdater &CGUpdater; + + /// Callback to get an OptimizationRemarkEmitter from a Function * + OptimizationRemarkGetter OREGetter; + + /// OpenMP-specific information cache. Also Used for Attributor runs. + OMPInformationCache &OMPInfoCache; + + /// Attributor instance. + Attributor &A; + + /// Helper function to run Attributor on SCC. + bool runAttributor() { + if (SCC.empty()) + return false; + + registerAAs(); + + ChangeStatus Changed = A.run(); + + LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() + << " functions, result: " << Changed << ".\n"); + + return Changed == ChangeStatus::CHANGED; + } + + /// Populate the Attributor with abstract attribute opportunities in the + /// function. + void registerAAs() { + for (Function *F : SCC) { + if (F->isDeclaration()) + continue; + + A.getOrCreateAAFor<AAICVTracker>(IRPosition::function(*F)); + } + } +}; + +Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { + if (!OMPInfoCache.ModuleSlice.count(&F)) + return nullptr; + + // Use a scope to keep the lifetime of the CachedKernel short. + { + Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; + if (CachedKernel) + return *CachedKernel; + + // TODO: We should use an AA to create an (optimistic and callback + // call-aware) call graph. For now we stick to simple patterns that + // are less powerful, basically the worst fixpoint. + if (isKernel(F)) { + CachedKernel = Kernel(&F); + return *CachedKernel; + } + + CachedKernel = nullptr; + if (!F.hasLocalLinkage()) + return nullptr; + } + + auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { + if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { + // Allow use in equality comparisons. + if (Cmp->isEquality()) + return getUniqueKernelFor(*Cmp); + return nullptr; + } + if (auto *CB = dyn_cast<CallBase>(U.getUser())) { + // Allow direct calls. + if (CB->isCallee(&U)) + return getUniqueKernelFor(*CB); + // Allow the use in __kmpc_kernel_prepare_parallel calls. + if (Function *Callee = CB->getCalledFunction()) + if (Callee->getName() == "__kmpc_kernel_prepare_parallel") + return getUniqueKernelFor(*CB); + return nullptr; + } + // Disallow every other use. + return nullptr; + }; + + // TODO: In the future we want to track more than just a unique kernel. + SmallPtrSet<Kernel, 2> PotentialKernels; + foreachUse(F, [&](const Use &U) { + PotentialKernels.insert(GetUniqueKernelForUse(U)); + }); + + Kernel K = nullptr; + if (PotentialKernels.size() == 1) + K = *PotentialKernels.begin(); + + // Cache the result. + UniqueKernelMap[&F] = K; + + return K; +} + +bool OpenMPOpt::rewriteDeviceCodeStateMachine() { + OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel]; + + bool Changed = false; + if (!KernelPrepareParallelRFI) + return Changed; + + for (Function *F : SCC) { + + // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at + // all. + bool UnknownUse = false; + bool KernelPrepareUse = false; + unsigned NumDirectCalls = 0; + + SmallVector<Use *, 2> ToBeReplacedStateMachineUses; + foreachUse(*F, [&](Use &U) { + if (auto *CB = dyn_cast<CallBase>(U.getUser())) + if (CB->isCallee(&U)) { + ++NumDirectCalls; + return; + } + + if (isa<ICmpInst>(U.getUser())) { + ToBeReplacedStateMachineUses.push_back(&U); + return; + } + if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall( + *U.getUser(), &KernelPrepareParallelRFI)) { + KernelPrepareUse = true; + ToBeReplacedStateMachineUses.push_back(&U); + return; + } + UnknownUse = true; + }); + + // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel + // use. + if (!KernelPrepareUse) + continue; + + { + auto Remark = [&](OptimizationRemark OR) { + return OR << "Found a parallel region that is called in a target " + "region but not part of a combined target construct nor " + "nesed inside a target construct without intermediate " + "code. This can lead to excessive register usage for " + "unrelated target regions in the same translation unit " + "due to spurious call edges assumed by ptxas."; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); + } + + // If this ever hits, we should investigate. + // TODO: Checking the number of uses is not a necessary restriction and + // should be lifted. + if (UnknownUse || NumDirectCalls != 1 || + ToBeReplacedStateMachineUses.size() != 2) { + { + auto Remark = [&](OptimizationRemark OR) { + return OR << "Parallel region is used in " + << (UnknownUse ? "unknown" : "unexpected") + << " ways; will not attempt to rewrite the state machine."; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); + } + continue; + } + + // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give + // up if the function is not called from a unique kernel. + Kernel K = getUniqueKernelFor(*F); + if (!K) { + { + auto Remark = [&](OptimizationRemark OR) { + return OR << "Parallel region is not known to be called from a " + "unique single target region, maybe the surrounding " + "function has external linkage?; will not attempt to " + "rewrite the state machine use."; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl", + Remark); + } + continue; + } + + // We now know F is a parallel body function called only from the kernel K. + // We also identified the state machine uses in which we replace the + // function pointer by a new global symbol for identification purposes. This + // ensures only direct calls to the function are left. + + { + auto RemarkParalleRegion = [&](OptimizationRemark OR) { + return OR << "Specialize parallel region that is only reached from a " + "single target region to avoid spurious call edges and " + "excessive register usage in other target regions. " + "(parallel region ID: " + << ore::NV("OpenMPParallelRegion", F->getName()) + << ", kernel ID: " + << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; + }; + emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", + RemarkParalleRegion); + auto RemarkKernel = [&](OptimizationRemark OR) { + return OR << "Target region containing the parallel region that is " + "specialized. (parallel region ID: " + << ore::NV("OpenMPParallelRegion", F->getName()) + << ", kernel ID: " + << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; + }; + emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel); + } + + Module &M = *F->getParent(); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); + + auto *ID = new GlobalVariable( + M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, + UndefValue::get(Int8Ty), F->getName() + ".ID"); + + for (Use *U : ToBeReplacedStateMachineUses) + U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); + + ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; + + Changed = true; + } + + return Changed; +} + +/// Abstract Attribute for tracking ICV values. +struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { + using Base = StateWrapper<BooleanState, AbstractAttribute>; + AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Returns true if value is assumed to be tracked. + bool isAssumedTracked() const { return getAssumed(); } + + /// Returns true if value is known to be tracked. + bool isKnownTracked() const { return getAssumed(); } + + /// Create an abstract attribute biew for the position \p IRP. + static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Return the value with which \p I can be replaced for specific \p ICV. + virtual Value *getReplacementValue(InternalControlVar ICV, + const Instruction *I, Attributor &A) = 0; + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAICVTracker"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAICVTracker + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + static const char ID; +}; + +struct AAICVTrackerFunction : public AAICVTracker { + AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) + : AAICVTracker(IRP, A) {} + + // FIXME: come up with better string. + const std::string getAsStr() const override { return "ICVTracker"; } + + // FIXME: come up with some stats. + void trackStatistics() const override {} + + /// TODO: decide whether to deduplicate here, or use current + /// deduplicateRuntimeCalls function. + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + for (InternalControlVar &ICV : TrackableICVs) + if (deduplicateICVGetters(ICV, A)) + Changed = ChangeStatus::CHANGED; + + return Changed; + } + + bool deduplicateICVGetters(InternalControlVar &ICV, Attributor &A) { + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + auto &ICVInfo = OMPInfoCache.ICVs[ICV]; + auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; + + bool Changed = false; + + auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { + CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); + Instruction *UserI = cast<Instruction>(U.getUser()); + Value *ReplVal = getReplacementValue(ICV, UserI, A); + + if (!ReplVal || !CI) + return false; + + A.removeCallSite(CI); + CI->replaceAllUsesWith(ReplVal); + CI->eraseFromParent(); + Changed = true; + return true; + }; + + GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope()); + return Changed; + } + + // Map of ICV to their values at specific program point. + EnumeratedArray<SmallSetVector<ICVValue, 4>, InternalControlVar, + InternalControlVar::ICV___last> + ICVValuesMap; + + // Currently only nthreads is being tracked. + // this array will only grow with time. + InternalControlVar TrackableICVs[1] = {ICV_nthreads}; + + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + + Function *F = getAnchorScope(); + + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + + for (InternalControlVar ICV : TrackableICVs) { + auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; + + auto TrackValues = [&](Use &U, Function &) { + CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); + if (!CI) + return false; + + // FIXME: handle setters with more that 1 arguments. + /// Track new value. + if (ICVValuesMap[ICV].insert(ICVValue(CI, CI->getArgOperand(0)))) + HasChanged = ChangeStatus::CHANGED; + + return false; + }; + + SetterRFI.foreachUse(TrackValues, F); + } + + return HasChanged; + } + + /// Return the value with which \p I can be replaced for specific \p ICV. + Value *getReplacementValue(InternalControlVar ICV, const Instruction *I, + Attributor &A) override { + const BasicBlock *CurrBB = I->getParent(); + + auto &ValuesSet = ICVValuesMap[ICV]; + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; + + for (const auto &ICVVal : ValuesSet) { + if (CurrBB == ICVVal.Inst->getParent()) { + if (!ICVVal.Inst->comesBefore(I)) + continue; + + // both instructions are in the same BB and at \p I we know the ICV + // value. + while (I != ICVVal.Inst) { + // we don't yet know if a call might update an ICV. + // TODO: check callsite AA for value. + if (const auto *CB = dyn_cast<CallBase>(I)) + if (CB->getCalledFunction() != GetterRFI.Declaration) + return nullptr; + + I = I->getPrevNode(); + } + + // No call in between, return the value. + return ICVVal.TrackedValue; + } + } + + // No value was tracked. + return nullptr; + } +}; +} // namespace + +const char AAICVTracker::ID = 0; + +AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, + Attributor &A) { + AAICVTracker *AA = nullptr; + switch (IRP.getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + case IRPosition::IRP_CALL_SITE: + llvm_unreachable("ICVTracker can only be created for function position!"); + case IRPosition::IRP_FUNCTION: + AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); + break; + } + + return *AA; +} + +PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) + return PreservedAnalyses::all(); + + if (DisableOpenMPOptimizations) + return PreservedAnalyses::all(); + + SmallVector<Function *, 16> SCC; + for (LazyCallGraph::Node &N : C) + SCC.push_back(&N.getFunction()); + + if (SCC.empty()) + return PreservedAnalyses::all(); + + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); + + AnalysisGetter AG(FAM); + + auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { + return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); + }; + + CallGraphUpdater CGUpdater; + CGUpdater.initialize(CG, C, AM, UR); + + SetVector<Function *> Functions(SCC.begin(), SCC.end()); + BumpPtrAllocator Allocator; + OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, + /*CGSCC*/ Functions, OMPInModule.getKernels()); + + Attributor A(Functions, InfoCache, CGUpdater); + + // TODO: Compute the module slice we are allowed to look at. + OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); + bool Changed = OMPOpt.run(); + if (Changed) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +namespace { + +struct OpenMPOptLegacyPass : public CallGraphSCCPass { + CallGraphUpdater CGUpdater; + OpenMPInModule OMPInModule; + static char ID; + + OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { + initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool doInitialization(CallGraph &CG) override { + // Disable the pass if there is no OpenMP (runtime call) in the module. + containsOpenMP(CG.getModule(), OMPInModule); + return false; + } + + bool runOnSCC(CallGraphSCC &CGSCC) override { + if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) + return false; + if (DisableOpenMPOptimizations || skipSCC(CGSCC)) + return false; + + SmallVector<Function *, 16> SCC; + for (CallGraphNode *CGN : CGSCC) + if (Function *Fn = CGN->getFunction()) + if (!Fn->isDeclaration()) + SCC.push_back(Fn); + + if (SCC.empty()) + return false; + + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + CGUpdater.initialize(CG, CGSCC); + + // Maintain a map of functions to avoid rebuilding the ORE + DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; + auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { + std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; + if (!ORE) + ORE = std::make_unique<OptimizationRemarkEmitter>(F); + return *ORE; + }; + + AnalysisGetter AG; + SetVector<Function *> Functions(SCC.begin(), SCC.end()); + BumpPtrAllocator Allocator; + OMPInformationCache InfoCache( + *(Functions.back()->getParent()), AG, Allocator, + /*CGSCC*/ Functions, OMPInModule.getKernels()); + + Attributor A(Functions, InfoCache, CGUpdater); + + // TODO: Compute the module slice we are allowed to look at. + OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); + return OMPOpt.run(); + } + + bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } +}; + +} // end anonymous namespace + +void OpenMPInModule::identifyKernels(Module &M) { + + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + if (!MD) + return; + + for (auto *Op : MD->operands()) { + if (Op->getNumOperands() < 2) + continue; + MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); + if (!KindID || KindID->getString() != "kernel") + continue; + + Function *KernelFn = + mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); + if (!KernelFn) + continue; + + ++NumOpenMPTargetRegionKernels; + + Kernels.insert(KernelFn); + } +} + +bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { + if (OMPInModule.isKnown()) + return OMPInModule; + + // MSVC doesn't like long if-else chains for some reason and instead just + // issues an error. Work around it.. + do { +#define OMP_RTL(_Enum, _Name, ...) \ + if (M.getFunction(_Name)) { \ + OMPInModule = true; \ + break; \ + } +#include "llvm/Frontend/OpenMP/OMPKinds.def" + } while (false); + + // Identify kernels once. TODO: We should split the OMPInformationCache into a + // module and an SCC part. The kernel information, among other things, could + // go into the module part. + if (OMPInModule.isKnown() && OMPInModule) { + OMPInModule.identifyKernels(M); + return true; + } + + return OMPInModule = false; +} + +char OpenMPOptLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", + "OpenMP specific optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", + "OpenMP specific optimizations", false, false) + +Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index cd3701e903080..5d863f1330a44 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -199,13 +198,14 @@ struct FunctionOutliningMultiRegionInfo { struct PartialInlinerImpl { PartialInlinerImpl( - std::function<AssumptionCache &(Function &)> *GetAC, + function_ref<AssumptionCache &(Function &)> GetAC, function_ref<AssumptionCache *(Function &)> LookupAC, - std::function<TargetTransformInfo &(Function &)> *GTTI, - Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, - ProfileSummaryInfo *ProfSI) + function_ref<TargetTransformInfo &(Function &)> GTTI, + function_ref<const TargetLibraryInfo &(Function &)> GTLI, + ProfileSummaryInfo &ProfSI, + function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr) : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC), - GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} + GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {} bool run(Module &M); // Main part of the transformation that calls helper functions to find @@ -270,11 +270,12 @@ struct PartialInlinerImpl { private: int NumPartialInlining = 0; - std::function<AssumptionCache &(Function &)> *GetAssumptionCache; + function_ref<AssumptionCache &(Function &)> GetAssumptionCache; function_ref<AssumptionCache *(Function &)> LookupAssumptionCache; - std::function<TargetTransformInfo &(Function &)> *GetTTI; - Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI; - ProfileSummaryInfo *PSI; + function_ref<TargetTransformInfo &(Function &)> GetTTI; + function_ref<BlockFrequencyInfo &(Function &)> GetBFI; + function_ref<const TargetLibraryInfo &(Function &)> GetTLI; + ProfileSummaryInfo &PSI; // Return the frequency of the OutlininingBB relative to F's entry point. // The result is no larger than 1 and is represented using BP. @@ -282,9 +283,9 @@ private: // edges from the guarding entry blocks). BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner); - // Return true if the callee of CS should be partially inlined with + // Return true if the callee of CB should be partially inlined with // profit. - bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, + bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE); @@ -303,26 +304,22 @@ private: NumPartialInlining >= MaxNumPartialInlining); } - static CallSite getCallSite(User *U) { - CallSite CS; - if (CallInst *CI = dyn_cast<CallInst>(U)) - CS = CallSite(CI); - else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) - CS = CallSite(II); - else - llvm_unreachable("All uses must be calls"); - return CS; + static CallBase *getSupportedCallBase(User *U) { + if (isa<CallInst>(U) || isa<InvokeInst>(U)) + return cast<CallBase>(U); + llvm_unreachable("All uses must be calls"); + return nullptr; } - static CallSite getOneCallSiteTo(Function *F) { + static CallBase *getOneCallSiteTo(Function *F) { User *User = *F->user_begin(); - return getCallSite(User); + return getSupportedCallBase(User); } std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) { - CallSite CS = getOneCallSiteTo(F); - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); - BasicBlock *Block = CS.getParent(); + CallBase *CB = getOneCallSiteTo(F); + DebugLoc DLoc = CB->getDebugLoc(); + BasicBlock *Block = CB->getParent(); return std::make_tuple(DLoc, Block); } @@ -355,6 +352,7 @@ struct PartialInlinerLegacyPass : public ModulePass { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } bool runOnModule(Module &M) override { @@ -364,11 +362,10 @@ struct PartialInlinerLegacyPass : public ModulePass { AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); TargetTransformInfoWrapperPass *TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); - ProfileSummaryInfo *PSI = - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + ProfileSummaryInfo &PSI = + getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - std::function<AssumptionCache &(Function &)> GetAssumptionCache = - [&ACT](Function &F) -> AssumptionCache & { + auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; @@ -376,13 +373,16 @@ struct PartialInlinerLegacyPass : public ModulePass { return ACT->lookupAssumptionCache(F); }; - std::function<TargetTransformInfo &(Function &)> GetTTI = - [&TTIWP](Function &F) -> TargetTransformInfo & { + auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & { return TTIWP->getTTI(F); }; - return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, - &GetTTI, NoneType::None, PSI) + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + }; + + return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI, + GetTLI, PSI) .run(M); } }; @@ -403,10 +403,10 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI)); BFI = ScopedBFI.get(); } else - BFI = &(*GetBFI)(*F); + BFI = &(GetBFI(*F)); // Return if we don't have profiling information. - if (!PSI->hasInstrumentationProfile()) + if (!PSI.hasInstrumentationProfile()) return std::unique_ptr<FunctionOutliningMultiRegionInfo>(); std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo = @@ -479,7 +479,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, // Only consider regions with predecessor blocks that are considered // not-cold (default: part of the top 99.99% of all block counters) // AND greater than our minimum block execution count (default: 100). - if (PSI->isColdBlock(thisBB, BFI) || + if (PSI.isColdBlock(thisBB, BFI) || BBProfileCount(thisBB) < MinBlockCounterExecution) continue; for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) { @@ -759,31 +759,28 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { } bool PartialInlinerImpl::shouldPartialInline( - CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost, + CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE) { using namespace ore; - Instruction *Call = CS.getInstruction(); - Function *Callee = CS.getCalledFunction(); + Function *Callee = CB.getCalledFunction(); assert(Callee == Cloner.ClonedFunc); if (SkipCostAnalysis) - return isInlineViable(*Callee); + return isInlineViable(*Callee).isSuccess(); - Function *Caller = CS.getCaller(); - auto &CalleeTTI = (*GetTTI)(*Callee); + Function *Caller = CB.getCaller(); + auto &CalleeTTI = GetTTI(*Callee); bool RemarksEnabled = Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( DEBUG_TYPE); - assert(Call && "invalid callsite for partial inline"); - InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(), - CalleeTTI, *GetAssumptionCache, GetBFI, PSI, - RemarksEnabled ? &ORE : nullptr); + InlineCost IC = + getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache, + GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr); if (IC.isAlways()) { ORE.emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB) << NV("Callee", Cloner.OrigFunc) << " should always be fully inlined, not partially"; }); @@ -792,7 +789,7 @@ bool PartialInlinerImpl::shouldPartialInline( if (IC.isNever()) { ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because it should never be inlined (cost=never)"; @@ -802,7 +799,7 @@ bool PartialInlinerImpl::shouldPartialInline( if (!IC) { ORE.emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" << NV("Cost", IC.getCost()) << ", threshold=" @@ -813,14 +810,14 @@ bool PartialInlinerImpl::shouldPartialInline( const DataLayout &DL = Caller->getParent()->getDataLayout(); // The savings of eliminating the call: - int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL); + int NonWeightedSavings = getCallsiteCost(CB, DL); BlockFrequency NormWeightedSavings(NonWeightedSavings); // Weighted saving is smaller than weighted cost, return false if (NormWeightedSavings < WeightedOutliningRcost) { ORE.emit([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", - Call) + &CB) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " runtime overhead (overhead=" << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) @@ -834,7 +831,7 @@ bool PartialInlinerImpl::shouldPartialInline( } ORE.emit([&]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB) << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) << " (threshold=" @@ -941,20 +938,20 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( CurrentCallerBFI = TempBFI.get(); } else { // New pass manager: - CurrentCallerBFI = &(*GetBFI)(*Caller); + CurrentCallerBFI = &(GetBFI(*Caller)); } }; for (User *User : Users) { - CallSite CS = getCallSite(User); - Function *Caller = CS.getCaller(); + CallBase *CB = getSupportedCallBase(User); + Function *Caller = CB->getCaller(); if (CurrentCaller != Caller) { CurrentCaller = Caller; ComputeCurrBFI(Caller); } else { assert(CurrentCallerBFI && "CallerBFI is not set"); } - BasicBlock *CallBB = CS.getInstruction()->getParent(); + BasicBlock *CallBB = CB->getParent(); auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB); if (Count) CallSiteToProfCountMap[User] = *Count; @@ -1155,8 +1152,8 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { - CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); - BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent(); + CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); + BasicBlock *OutliningCallBB = OCS->getParent(); assert(OutliningCallBB->getParent() == ClonedFunc); OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB)); NumColdRegionsOutlined++; @@ -1164,7 +1161,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { if (MarkOutlinedColdCC) { OutlinedFunc->setCallingConv(CallingConv::Cold); - OCS.setCallingConv(CallingConv::Cold); + OCS->setCallingConv(CallingConv::Cold); } } else ORE.emit([&]() { @@ -1224,7 +1221,6 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { if (OutlinedFunc) { BasicBlock *OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) - .getInstruction() ->getParent(); assert(OutliningCallBB->getParent() == ClonedFunc); OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB)); @@ -1266,7 +1262,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) { if (F->hasFnAttribute(Attribute::NoInline)) return {false, nullptr}; - if (PSI->isFunctionEntryCold(F)) + if (PSI.isFunctionEntryCold(F)) return {false, nullptr}; if (F->users().empty()) @@ -1276,7 +1272,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) { // Only try to outline cold regions if we have a profile summary, which // implies we have profiling information. - if (PSI->hasProfileSummary() && F->hasProfileData() && + if (PSI.hasProfileSummary() && F->hasProfileData() && !DisableMultiRegionPartialInline) { std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI = computeOutliningColdRegionsInfo(F, ORE); @@ -1285,8 +1281,8 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) { #ifndef NDEBUG if (TracePartialInlining) { - dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n"; - dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold() + dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n"; + dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold() << "\n"; } #endif @@ -1391,27 +1387,28 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { bool AnyInline = false; for (User *User : Users) { - CallSite CS = getCallSite(User); + CallBase *CB = getSupportedCallBase(User); if (IsLimitReached()) continue; - OptimizationRemarkEmitter CallerORE(CS.getCaller()); - if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE)) + OptimizationRemarkEmitter CallerORE(CB->getCaller()); + if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE)) continue; // Construct remark before doing the inlining, as after successful inlining // the callsite is removed. - OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); + OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB); OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " - << ore::NV("Caller", CS.getCaller()); + << ore::NV("Caller", CB->getCaller()); - InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); + InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI); // We can only forward varargs when we outlined a single region, else we // bail on vararg functions. - if (!InlineFunction(CS, IFI, nullptr, true, + if (!InlineFunction(*CB, IFI, nullptr, true, (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first - : nullptr))) + : nullptr)) + .isSuccess()) continue; CallerORE.emit(OR); @@ -1492,6 +1489,7 @@ INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) @@ -1503,8 +1501,7 @@ PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - std::function<AssumptionCache &(Function &)> GetAssumptionCache = - [&FAM](Function &F) -> AssumptionCache & { + auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); }; @@ -1512,20 +1509,22 @@ PreservedAnalyses PartialInlinerPass::run(Module &M, return FAM.getCachedResult<AssumptionAnalysis>(F); }; - std::function<BlockFrequencyInfo &(Function &)> GetBFI = - [&FAM](Function &F) -> BlockFrequencyInfo & { + auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { return FAM.getResult<BlockFrequencyAnalysis>(F); }; - std::function<TargetTransformInfo &(Function &)> GetTTI = - [&FAM](Function &F) -> TargetTransformInfo & { + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult<TargetIRAnalysis>(F); }; - ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + + ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M); - if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI, - {GetBFI}, PSI) + if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI, + GetTLI, PSI, GetBFI) .run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 9c992830879ae..d73d42c52074b 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" @@ -46,6 +47,7 @@ #include "llvm/Transforms/Vectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; @@ -98,8 +100,8 @@ static cl::opt<bool> EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable performing ThinLTO.")); -cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, - cl::desc("Enable hot-cold splitting pass")); +cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), + cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); static cl::opt<bool> UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, @@ -115,7 +117,7 @@ static cl::opt<int> PreInlineThreshold( "(default = 75)")); static cl::opt<bool> EnableGVNHoist( - "enable-gvn-hoist", cl::init(false), cl::Hidden, + "enable-gvn-hoist", cl::init(false), cl::ZeroOrMore, cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt<bool> @@ -129,7 +131,7 @@ static cl::opt<bool> EnableSimpleLoopUnswitch( "cleanup passes integrated into the loop pass manager pipeline.")); static cl::opt<bool> EnableGVNSink( - "enable-gvn-sink", cl::init(false), cl::Hidden, + "enable-gvn-sink", cl::init(false), cl::ZeroOrMore, cl::desc("Enable the GVN sinking pass (default = off)")); // This option is used in simplifying testing SampleFDO optimizations for @@ -151,15 +153,29 @@ static cl::opt<bool> EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); +cl::opt<AttributorRunOption> AttributorRun( + "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), + cl::desc("Enable the attributor inter-procedural deduction pass."), + cl::values(clEnumValN(AttributorRunOption::ALL, "all", + "enable all attributor runs"), + clEnumValN(AttributorRunOption::MODULE, "module", + "enable module-wide attributor runs"), + clEnumValN(AttributorRunOption::CGSCC, "cgscc", + "enable call graph SCC attributor runs"), + clEnumValN(AttributorRunOption::NONE, "none", + "disable attributor runs"))); + +extern cl::opt<bool> EnableKnowledgeRetention; + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; DisableUnrollLoops = false; - SLPVectorize = RunSLPVectorization; - LoopVectorize = EnableLoopVectorization; - LoopsInterleaved = EnableLoopInterleaving; + SLPVectorize = false; + LoopVectorize = true; + LoopsInterleaved = true; RerollLoops = RunLoopRerolling; NewGVN = RunNewGVN; LicmMssaOptCap = SetLicmMssaOptCap; @@ -179,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() { PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; + CallGraphProfile = true; } PassManagerBuilder::~PassManagerBuilder() { @@ -187,8 +204,13 @@ PassManagerBuilder::~PassManagerBuilder() { } /// Set of global extensions, automatically added as part of the standard set. -static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, - PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; +static ManagedStatic< + SmallVector<std::tuple<PassManagerBuilder::ExtensionPointTy, + PassManagerBuilder::ExtensionFn, + PassManagerBuilder::GlobalExtensionID>, + 8>> + GlobalExtensions; +static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter; /// Check if GlobalExtensions is constructed and not empty. /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger @@ -197,10 +219,29 @@ static bool GlobalExtensionsNotEmpty() { return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); } -void PassManagerBuilder::addGlobalExtension( - PassManagerBuilder::ExtensionPointTy Ty, - PassManagerBuilder::ExtensionFn Fn) { - GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); +PassManagerBuilder::GlobalExtensionID +PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + auto ExtensionID = GlobalExtensionsCounter++; + GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID)); + return ExtensionID; +} + +void PassManagerBuilder::removeGlobalExtension( + PassManagerBuilder::GlobalExtensionID ExtensionID) { + // RegisterStandardPasses may try to call this function after GlobalExtensions + // has already been destroyed; doing so should not generate an error. + if (!GlobalExtensions.isConstructed()) + return; + + auto GlobalExtension = + llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) { + return std::get<2>(elem) == ExtensionID; + }); + assert(GlobalExtension != GlobalExtensions->end() && + "The extension ID to be removed should always be valid."); + + GlobalExtensions->erase(GlobalExtension); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { @@ -211,8 +252,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { if (GlobalExtensionsNotEmpty()) { for (auto &Ext : *GlobalExtensions) { - if (Ext.first == ETy) - Ext.second(*this, PM); + if (std::get<0>(Ext) == ETy) + std::get<1>(Ext)(*this, PM); } } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) @@ -244,12 +285,6 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses( PM.add(createScopedNoAliasAAWrapperPass()); } -void PassManagerBuilder::addInstructionCombiningPass( - legacy::PassManagerBase &PM) const { - bool ExpensiveCombines = OptLevel > 2; - PM.add(createInstructionCombiningPass(ExpensiveCombines)); -} - void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); @@ -327,6 +362,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + if (EnableKnowledgeRetention) + MPM.add(createAssumeSimplifyPass()); if (OptLevel > 1) { if (EnableGVNHoist) @@ -348,7 +385,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) MPM.add(createLibCallsShrinkWrapPass()); addExtensionsToPM(EP_Peephole, MPM); @@ -383,7 +420,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. MPM.add(createCFGSimplificationPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. @@ -414,7 +451,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1) { MPM.add(createJumpThreadingPass()); // Thread jumps @@ -432,7 +469,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); if (EnableCHR && OptLevel >= 3 && @@ -478,6 +515,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. @@ -511,9 +549,11 @@ void PassManagerBuilder::populateModulePassManager( // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) + if (PerformThinLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); + MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); + } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation @@ -526,6 +566,10 @@ void PassManagerBuilder::populateModulePassManager( // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); + // Infer attributes on declarations, call sites, arguments, etc. + if (AttributorRun & AttributorRunOption::MODULE) + MPM.add(createAttributorLegacyPass()); + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); if (OptLevel > 2) @@ -534,16 +578,13 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); - // Infer attributes on declarations, call sites, arguments, etc. - MPM.add(createAttributorLegacyPass()); - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE @@ -574,6 +615,15 @@ void PassManagerBuilder::populateModulePassManager( RunInliner = true; } + // Infer attributes on declarations, call sites, arguments, etc. for an SCC. + if (AttributorRun & AttributorRunOption::CGSCC) + MPM.add(createAttributorCGSCCLegacyPass()); + + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (OptLevel > 1) + MPM.add(createOpenMPOptLegacyPass()); + MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -705,7 +755,7 @@ void PassManagerBuilder::populateModulePassManager( // on -O1 and no #pragma is found). Would be good to have these two passes // as function calls, so that we can only pass them when the vectorizer // changed the code. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); if (OptLevel > 1 && ExtraVectorizerPasses) { // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correllated @@ -715,11 +765,11 @@ void PassManagerBuilder::populateModulePassManager( // dead (or speculatable) control flows or more combining opportunities. MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); } // Cleanup after loop vectorization, etc. Simplification passes like CVP and @@ -736,8 +786,11 @@ void PassManagerBuilder::populateModulePassManager( } } + // Enhance/cleanup vector code. + MPM.add(createVectorCombinePass()); + addExtensionsToPM(EP_Peephole, MPM); - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); if (EnableUnrollAndJam && !DisableUnrollLoops) { // Unroll and Jam. We do this before unroll but need to be in a separate @@ -752,7 +805,7 @@ void PassManagerBuilder::populateModulePassManager( if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. - addInstructionCombiningPass(MPM); + MPM.add(createInstructionCombiningPass()); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the @@ -785,6 +838,10 @@ void PassManagerBuilder::populateModulePassManager( if (MergeFunctions) MPM.add(createMergeFunctionsPass()); + // Add Module flag "CG Profile" based on Branch Frequency Information. + if (CallGraphProfile) + MPM.add(createCGProfileLegacyPass()); + // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -852,7 +909,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createCalledValuePropagationPass()); // Infer attributes on declarations, call sites, arguments, etc. - PM.add(createAttributorLegacyPass()); + if (AttributorRun & AttributorRunOption::MODULE) + PM.add(createAttributorLegacyPass()); } // Infer attributes about definitions. The readnone attribute in particular is @@ -890,7 +948,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // calls, etc, so let instcombine do this. if (OptLevel > 2) PM.add(createAggressiveInstCombinerPass()); - addInstructionCombiningPass(PM); + PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); // Inline small functions @@ -905,6 +963,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // CSFDO instrumentation and use pass. addPGOInstrPasses(PM, /* IsCS */ true); + // Infer attributes on declarations, call sites, arguments, etc. for an SCC. + if (AttributorRun & AttributorRunOption::CGSCC) + PM.add(createAttributorCGSCCLegacyPass()); + + // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if + // there are no OpenMP runtime calls present in the module. + if (OptLevel > 1) + PM.add(createOpenMPOptLegacyPass()); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); @@ -915,7 +982,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createArgumentPromotionPass()); // The IPO passes may leave cruft around. Clean up after them. - addInstructionCombiningPass(PM); + PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -960,22 +1027,24 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - addInstructionCombiningPass(PM); // Initial cleanup + PM.add(createInstructionCombiningPass()); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert PM.add(createSCCPPass()); // Propagate exposed constants - addInstructionCombiningPass(PM); // Clean up again + PM.add(createInstructionCombiningPass()); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information if (SLPVectorize) PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + PM.add(createVectorCombinePass()); // Clean up partial vectorization. + // After vectorization, assume intrinsics may tell us more about pointer // alignments. PM.add(createAlignmentFromAssumptionsPass()); // Cleanup and simplify the code after the scalar optimizations. - addInstructionCombiningPass(PM); + PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); @@ -1013,8 +1082,8 @@ void PassManagerBuilder::populateThinLTOPassManager( PM.add(createVerifierPass()); if (ImportSummary) { - // These passes import type identifier resolutions for whole-program - // devirtualization and CFI. They must run early because other passes may + // This pass imports type identifier resolutions for whole-program + // devirtualization and CFI. It must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // @@ -1062,6 +1131,9 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); + // Run a second time to clean up any type tests left behind by WPD for use + // in ICP (which is performed earlier than this in the regular LTO pipeline). + PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); @@ -1072,14 +1144,6 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { PM.add(createVerifierPass()); } -inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { - return reinterpret_cast<PassManagerBuilder*>(P); -} - -inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { - return reinterpret_cast<LLVMPassManagerBuilderRef>(P); -} - LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp index 45a0ce20eb175..a16dc664db64d 100644 --- a/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -135,8 +135,8 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { SCCMightUnwind |= InstMightUnwind; } if (CheckReturnViaAsm && !SCCMightReturn) - if (auto ICS = ImmutableCallSite(&I)) - if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue())) + if (const auto *CB = dyn_cast<CallBase>(&I)) + if (const auto *IA = dyn_cast<InlineAsm>(CB->getCalledOperand())) if (IA->hasSideEffects()) SCCMightReturn = true; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index a1fbb1adc412c..b6871e260532d 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -37,15 +37,16 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" @@ -148,14 +149,17 @@ static cl::opt<bool> ProfileAccurateForSymsInList( "be accurate. It may be overriden by profile-sample-accurate. ")); static cl::opt<bool> ProfileMergeInlinee( - "sample-profile-merge-inlinee", cl::Hidden, cl::init(false), + "sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " - "profile loader decided not to inline a call site.")); + "profile loader decided not to inline a call site. It will " + "only be enabled when top-down order of profile loading is " + "enabled. ")); static cl::opt<bool> ProfileTopDownLoad( - "sample-profile-top-down-load", cl::Hidden, cl::init(false), + "sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " - "order of call graph during sample profile loading.")); + "order of call graph during sample profile loading. It only " + "works for new pass manager. ")); static cl::opt<bool> ProfileSizeInline( "sample-profile-inline-size", cl::Hidden, cl::init(false), @@ -235,7 +239,7 @@ public: DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap) : CurrentReader(Reader), CurrentModule(M), CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { - if (CurrentReader.getFormat() != SPF_Compact_Binary) + if (!CurrentReader.useMD5()) return; for (const auto &F : CurrentModule) { @@ -261,7 +265,7 @@ public: } ~GUIDToFuncNameMapper() { - if (CurrentReader.getFormat() != SPF_Compact_Binary) + if (!CurrentReader.useMD5()) return; CurrentGUIDToFuncNameMap.clear(); @@ -307,10 +311,12 @@ public: SampleProfileLoader( StringRef Name, StringRef RemapName, bool IsThinLTOPreLink, std::function<AssumptionCache &(Function &)> GetAssumptionCache, - std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) + std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo, + std::function<const TargetLibraryInfo &(Function &)> GetTLI) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), - Filename(Name), RemappingFilename(RemapName), + GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), + CoverageTracker(*this), Filename(std::string(Name)), + RemappingFilename(std::string(RemapName)), IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); @@ -327,18 +333,19 @@ protected: bool emitAnnotations(Function &F); ErrorOr<uint64_t> getInstWeight(const Instruction &I); ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB); - const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; + const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const; std::vector<const FunctionSamples *> findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineCallInstruction(Instruction *I); + bool inlineCallInstruction(CallBase &CB); bool inlineHotFunctions(Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs); // Inline cold/small functions in addition to hot ones - bool shouldInlineColdCallee(Instruction &CallInst); + bool shouldInlineColdCallee(CallBase &CallInst); void emitOptimizationRemarksForInlineCandidates( - const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot); + const SmallVectorImpl<CallBase *> &Candidates, const Function &F, + bool Hot); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -397,6 +404,7 @@ protected: std::function<AssumptionCache &(Function &)> GetAC; std::function<TargetTransformInfo &(Function &)> GetTTI; + std::function<const TargetLibraryInfo &(Function &)> GetTLI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -474,14 +482,17 @@ public: SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, bool IsThinLTOPreLink = false) - : ModulePass(ID), - SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink, - [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }, - [&](Function &F) -> TargetTransformInfo & { - return TTIWP->getTTI(F); - }) { + : ModulePass(ID), SampleLoader( + Name, SampleProfileRemappingFile, IsThinLTOPreLink, + [&](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }, + [&](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }, + [&](Function &F) -> TargetLibraryInfo & { + return TLIWP->getTLI(F); + }) { initializeSampleProfileLoaderLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -498,6 +509,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); } @@ -505,6 +517,7 @@ private: SampleProfileLoader SampleLoader; AssumptionCacheTracker *ACT = nullptr; TargetTransformInfoWrapperPass *TTIWP = nullptr; + TargetLibraryInfoWrapperPass *TLIWP = nullptr; }; } // end anonymous namespace @@ -710,10 +723,9 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { // (findCalleeFunctionSamples returns non-empty result), but not inlined here, // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. - if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && - !ImmutableCallSite(&Inst).isIndirectCall() && - findCalleeFunctionSamples(Inst)) - return 0; + if (auto *CB = dyn_cast<CallBase>(&Inst)) + if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) + return 0; const DILocation *DIL = DLoc; uint32_t LineOffset = FunctionSamples::getOffset(DIL); @@ -801,7 +813,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) { /// /// \returns The FunctionSamples pointer to the inlined instance. const FunctionSamples * -SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { +SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { const DILocation *DIL = Inst.getDebugLoc(); if (!DIL) { return nullptr; @@ -885,13 +897,11 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { - assert(isa<CallInst>(I) || isa<InvokeInst>(I)); - CallSite CS(I); - Function *CalledFunction = CS.getCalledFunction(); +bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { + Function *CalledFunction = CB.getCalledFunction(); assert(CalledFunction); - DebugLoc DLoc = I->getDebugLoc(); - BasicBlock *BB = I->getParent(); + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *BB = CB.getParent(); InlineParams Params = getInlineParams(); Params.ComputeFullInlineCost = true; // Checks if there is anything in the reachable portion of the callee at @@ -901,46 +911,43 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { // The acutal cost does not matter because we only checks isNever() to // see if it is legal to inline the callsite. InlineCost Cost = - getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC, - None, nullptr, nullptr); + getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI); if (Cost.isNever()) { ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) << "incompatible inlining"); return false; } - InlineFunctionInfo IFI(nullptr, &GetAC); - if (InlineFunction(CS, IFI)) { + InlineFunctionInfo IFI(nullptr, GetAC); + if (InlineFunction(CB, IFI).isSuccess()) { // The call to InlineFunction erases I, so we can't pass it here. - ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB) - << "inlined callee '" << ore::NV("Callee", CalledFunction) - << "' into '" << ore::NV("Caller", BB->getParent()) << "'"); + emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, + true, CSINLINE_DEBUG); return true; } return false; } -bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) { +bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) { if (!ProfileSizeInline) return false; - Function *Callee = CallSite(&CallInst).getCalledFunction(); + Function *Callee = CallInst.getCalledFunction(); if (Callee == nullptr) return false; - InlineCost Cost = - getInlineCost(cast<CallBase>(CallInst), getInlineParams(), - GetTTI(*Callee), GetAC, None, nullptr, nullptr); + InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee), + GetAC, GetTLI); return Cost.getCost() <= SampleColdCallSiteThreshold; } void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates( - const SmallVector<Instruction *, 10> &Candidates, const Function &F, + const SmallVectorImpl<CallBase *> &Candidates, const Function &F, bool Hot) { for (auto I : Candidates) { - Function *CalledFunction = CallSite(I).getCalledFunction(); + Function *CalledFunction = I->getCalledFunction(); if (CalledFunction) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", I->getDebugLoc(), I->getParent()) << "previous inlining reattempted for " << (Hot ? "hotness: '" : "size: '") @@ -975,43 +982,43 @@ bool SampleProfileLoader::inlineHotFunctions( "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites; + DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites; bool Changed = false; while (true) { bool LocalChanged = false; - SmallVector<Instruction *, 10> CIS; + SmallVector<CallBase *, 10> CIS; for (auto &BB : F) { bool Hot = false; - SmallVector<Instruction *, 10> AllCandidates; - SmallVector<Instruction *, 10> ColdCandidates; + SmallVector<CallBase *, 10> AllCandidates; + SmallVector<CallBase *, 10> ColdCandidates; for (auto &I : BB.getInstList()) { const FunctionSamples *FS = nullptr; - if ((isa<CallInst>(I) || isa<InvokeInst>(I)) && - !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) { - AllCandidates.push_back(&I); - if (FS->getEntrySamples() > 0) - localNotInlinedCallSites.try_emplace(&I, FS); - if (callsiteIsHot(FS, PSI)) - Hot = true; - else if (shouldInlineColdCallee(I)) - ColdCandidates.push_back(&I); + if (auto *CB = dyn_cast<CallBase>(&I)) { + if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) { + AllCandidates.push_back(CB); + if (FS->getEntrySamples() > 0) + localNotInlinedCallSites.try_emplace(CB, FS); + if (callsiteIsHot(FS, PSI)) + Hot = true; + else if (shouldInlineColdCallee(*CB)) + ColdCandidates.push_back(CB); + } } } if (Hot) { CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true); - } - else { + } else { CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end()); emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false); } } - for (auto I : CIS) { - Function *CalledFunction = CallSite(I).getCalledFunction(); + for (CallBase *I : CIS) { + Function *CalledFunction = I->getCalledFunction(); // Do not inline recursive calls. if (CalledFunction == &F) continue; - if (CallSite(I).isIndirectCall()) { + if (I->isIndirectCall()) { if (PromotedInsns.count(I)) continue; uint64_t Sum; @@ -1021,7 +1028,7 @@ bool SampleProfileLoader::inlineHotFunctions( PSI->getOrCompHotCountThreshold()); continue; } - auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent()); + auto CalleeFunctionName = FS->getFuncName(); // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is @@ -1038,15 +1045,16 @@ bool SampleProfileLoader::inlineHotFunctions( if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && - isLegalToPromote(CallSite(I), R->getValue(), &Reason)) { + R->getValue()->hasFnAttribute("use-sample-profile") && + isLegalToPromote(*I, R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); - Instruction *DI = - pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE); + auto &DI = + pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); Sum -= C; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) && - inlineCallInstruction(DI)) { + inlineCallInstruction(cast<CallBase>(DI))) { localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; @@ -1059,7 +1067,7 @@ bool SampleProfileLoader::inlineHotFunctions( } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(I)) { + if (inlineCallInstruction(*I)) { localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; @@ -1078,8 +1086,8 @@ bool SampleProfileLoader::inlineHotFunctions( // Accumulate not inlined callsite information into notInlinedSamples for (const auto &Pair : localNotInlinedCallSites) { - Instruction *I = Pair.getFirst(); - Function *Callee = CallSite(I).getCalledFunction(); + CallBase *I = Pair.getFirst(); + Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; @@ -1525,8 +1533,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { for (auto &I : BB->getInstList()) { if (!isa<CallInst>(I) && !isa<InvokeInst>(I)) continue; - CallSite CS(&I); - if (!CS.getCalledFunction()) { + if (!cast<CallBase>(I).getCalledFunction()) { const DebugLoc &DLoc = I.getDebugLoc(); if (!DLoc) continue; @@ -1770,6 +1777,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) @@ -1780,8 +1788,17 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { FunctionOrderList.reserve(M.size()); if (!ProfileTopDownLoad || CG == nullptr) { + if (ProfileMergeInlinee) { + // Disable ProfileMergeInlinee if profile is not loaded in top down order, + // because the profile for a function may be used for the profile + // annotation of its outline copy before the profile merging of its + // non-inlined inline instances, and that is not the way how + // ProfileMergeInlinee is supposed to work. + ProfileMergeInlinee = false; + } + for (Function &F : M) - if (!F.isDeclaration()) + if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(&F); return FunctionOrderList; } @@ -1791,7 +1808,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { while (!CGI.isAtEnd()) { for (CallGraphNode *node : *CGI) { auto F = node->getFunction(); - if (F && !F->isDeclaration()) + if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); } ++CGI; @@ -1839,15 +1856,16 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { - GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; - if (M.getProfileSummary(/* IsCS */ false) == nullptr) + if (M.getProfileSummary(/* IsCS */ false) == nullptr) { M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), ProfileSummary::PSK_Sample); - + PSI->refresh(); + } // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); @@ -1890,6 +1908,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { ACT = &getAnalysis<AssumptionCacheTracker>(); TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); + TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); ProfileSummaryInfo *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); @@ -1966,12 +1985,15 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, auto GetTTI = [&](Function &F) -> TargetTransformInfo & { return FAM.getResult<TargetIRAnalysis>(F); }; + auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; SampleProfileLoader SampleLoader( ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - IsThinLTOPreLink, GetAssumptionCache, GetTTI); + IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI); if (!SampleLoader.doInitialization(M)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp index 6ce00714523b3..088091df770f9 100644 --- a/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -147,10 +147,12 @@ static void RemoveDeadConstant(Constant *C) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals. GV->eraseFromParent(); - } - else if (!isa<Function>(C)) - if (isa<CompositeType>(C->getType())) + } else if (!isa<Function>(C)) { + // FIXME: Why does the type of the constant matter here? + if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType()) || + isa<VectorType>(C->getType())) C->destroyConstant(); + } // If the constant referenced anything, see if we can delete it as well. for (Constant *O : Operands) diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp index 45fd432fd721e..1b1e91cafa651 100644 --- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp +++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -31,7 +31,6 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/SyntheticCountsUtils.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -110,14 +109,13 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M, Optional<Scaled64> Res = None; if (!Edge.first) return Res; - assert(isa<Instruction>(Edge.first)); - CallSite CS(cast<Instruction>(Edge.first)); - Function *Caller = CS.getCaller(); + CallBase &CB = *cast<CallBase>(*Edge.first); + Function *Caller = CB.getCaller(); auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); // Now compute the callsite count from relative frequency and // entry count: - BasicBlock *CSBB = CS.getInstruction()->getParent(); + BasicBlock *CSBB = CB.getParent(); Scaled64 EntryFreq(BFI.getEntryFreq(), 0); Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0); BBCount /= EntryFreq; diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 5ccfb29b01a13..5a25f9857665c 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -57,12 +57,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TypeMetadataUtils.h" -#include "llvm/IR/CallSite.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" @@ -83,11 +85,12 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -115,12 +118,15 @@ static cl::opt<PassSummaryAction> ClSummaryAction( static cl::opt<std::string> ClReadSummary( "wholeprogramdevirt-read-summary", - cl::desc("Read summary from given YAML file before running pass"), + cl::desc( + "Read summary from given bitcode or YAML file before running pass"), cl::Hidden); static cl::opt<std::string> ClWriteSummary( "wholeprogramdevirt-write-summary", - cl::desc("Write summary to given YAML file after running pass"), + cl::desc("Write summary to given bitcode or YAML file after running pass. " + "Output file format is deduced from extension: *.bc means writing " + "bitcode, otherwise YAML"), cl::Hidden); static cl::opt<unsigned> @@ -134,6 +140,45 @@ static cl::opt<bool> cl::init(false), cl::ZeroOrMore, cl::desc("Print index-based devirtualization messages")); +/// Provide a way to force enable whole program visibility in tests. +/// This is needed to support legacy tests that don't contain +/// !vcall_visibility metadata (the mere presense of type tests +/// previously implied hidden visibility). +cl::opt<bool> + WholeProgramVisibility("whole-program-visibility", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable whole program visibility")); + +/// Provide a way to force disable whole program for debugging or workarounds, +/// when enabled via the linker. +cl::opt<bool> DisableWholeProgramVisibility( + "disable-whole-program-visibility", cl::init(false), cl::Hidden, + cl::ZeroOrMore, + cl::desc("Disable whole program visibility (overrides enabling options)")); + +/// Provide way to prevent certain function from being devirtualized +cl::list<std::string> + SkipFunctionNames("wholeprogramdevirt-skip", + cl::desc("Prevent function(s) from being devirtualized"), + cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated); + +namespace { +struct PatternList { + std::vector<GlobPattern> Patterns; + template <class T> void init(const T &StringList) { + for (const auto &S : StringList) + if (Expected<GlobPattern> Pat = GlobPattern::create(S)) + Patterns.push_back(std::move(*Pat)); + } + bool match(StringRef S) { + for (const GlobPattern &P : Patterns) + if (P.match(S)) + return true; + return false; + } +}; +} // namespace + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -308,20 +353,20 @@ namespace { // A virtual call site. VTable is the loaded virtual table pointer, and CS is // the indirect virtual call. struct VirtualCallSite { - Value *VTable; - CallSite CS; + Value *VTable = nullptr; + CallBase &CB; // If non-null, this field points to the associated unsafe use count stored in // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description // of that field for details. - unsigned *NumUnsafeUses; + unsigned *NumUnsafeUses = nullptr; void emitRemark(const StringRef OptName, const StringRef TargetName, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) { - Function *F = CS.getCaller(); - DebugLoc DLoc = CS->getDebugLoc(); - BasicBlock *Block = CS.getParent(); + Function *F = CB.getCaller(); + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *Block = CB.getParent(); using namespace ore; OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block) @@ -336,12 +381,12 @@ struct VirtualCallSite { Value *New) { if (RemarksEnabled) emitRemark(OptName, TargetName, OREGetter); - CS->replaceAllUsesWith(New); - if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) { - BranchInst::Create(II->getNormalDest(), CS.getInstruction()); + CB.replaceAllUsesWith(New); + if (auto *II = dyn_cast<InvokeInst>(&CB)) { + BranchInst::Create(II->getNormalDest(), &CB); II->getUnwindDest()->removePredecessor(II->getParent()); } - CS->eraseFromParent(); + CB.eraseFromParent(); // This use is no longer unsafe. if (NumUnsafeUses) --*NumUnsafeUses; @@ -414,18 +459,18 @@ struct VTableSlotInfo { // "this"), grouped by argument list. std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo; - void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses); + void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses); private: - CallSiteInfo &findCallSiteInfo(CallSite CS); + CallSiteInfo &findCallSiteInfo(CallBase &CB); }; -CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) { +CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) { std::vector<uint64_t> Args; - auto *CI = dyn_cast<IntegerType>(CS.getType()); - if (!CI || CI->getBitWidth() > 64 || CS.arg_empty()) + auto *CBType = dyn_cast<IntegerType>(CB.getType()); + if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty()) return CSInfo; - for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) { + for (auto &&Arg : make_range(CB.arg_begin() + 1, CB.arg_end())) { auto *CI = dyn_cast<ConstantInt>(Arg); if (!CI || CI->getBitWidth() > 64) return CSInfo; @@ -434,11 +479,11 @@ CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) { return ConstCSInfo[Args]; } -void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS, +void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses) { - auto &CSI = findCallSiteInfo(CS); + auto &CSI = findCallSiteInfo(CB); CSI.AllCallSitesDevirted = false; - CSI.CallSites.push_back({VTable, CS, NumUnsafeUses}); + CSI.CallSites.push_back({VTable, CB, NumUnsafeUses}); } struct DevirtModule { @@ -454,6 +499,10 @@ struct DevirtModule { IntegerType *Int32Ty; IntegerType *Int64Ty; IntegerType *IntPtrTy; + /// Sizeless array type, used for imported vtables. This provides a signal + /// to analyzers that these imports may alias, as they do for example + /// when multiple unique return values occur in the same vtable. + ArrayType *Int8Arr0Ty; bool RemarksEnabled; function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter; @@ -469,6 +518,7 @@ struct DevirtModule { // eliminate the type check by RAUWing the associated llvm.type.test call with // true. std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest; + PatternList FunctionsToSkip; DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter, @@ -482,13 +532,17 @@ struct DevirtModule { Int32Ty(Type::getInt32Ty(M.getContext())), Int64Ty(Type::getInt64Ty(M.getContext())), IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)), + Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)), RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) { assert(!(ExportSummary && ImportSummary)); + FunctionsToSkip.init(SkipFunctionNames); } bool areRemarksEnabled(); - void scanTypeTestUsers(Function *TypeTestFunc); + void + scanTypeTestUsers(Function *TypeTestFunc, + DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap); void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc); void buildTypeIdentifierMap( @@ -592,12 +646,16 @@ struct DevirtIndex { MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots; + PatternList FunctionsToSkip; + DevirtIndex( ModuleSummaryIndex &ExportSummary, std::set<GlobalValue::GUID> &ExportedGUIDs, std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs), - LocalWPDTargetsMap(LocalWPDTargetsMap) {} + LocalWPDTargetsMap(LocalWPDTargetsMap) { + FunctionsToSkip.init(SkipFunctionNames); + } bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, @@ -702,7 +760,49 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return PreservedAnalyses::none(); } +// Enable whole program visibility if enabled by client (e.g. linker) or +// internal option, and not force disabled. +static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) { + return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) && + !DisableWholeProgramVisibility; +} + namespace llvm { + +/// If whole program visibility asserted, then upgrade all public vcall +/// visibility metadata on vtable definitions to linkage unit visibility in +/// Module IR (for regular or hybrid LTO). +void updateVCallVisibilityInModule(Module &M, + bool WholeProgramVisibilityEnabledInLTO) { + if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) + return; + for (GlobalVariable &GV : M.globals()) + // Add linkage unit visibility to any variable with type metadata, which are + // the vtable definitions. We won't have an existing vcall_visibility + // metadata on vtable definitions with public visibility. + if (GV.hasMetadata(LLVMContext::MD_type) && + GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic) + GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit); +} + +/// If whole program visibility asserted, then upgrade all public vcall +/// visibility metadata on vtable definition summaries to linkage unit +/// visibility in Module summary index (for ThinLTO). +void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, + bool WholeProgramVisibilityEnabledInLTO) { + if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) + return; + for (auto &P : Index) { + for (auto &S : P.second.SummaryList) { + auto *GVar = dyn_cast<GlobalVarSummary>(S.get()); + if (!GVar || GVar->vTableFuncs().empty() || + GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) + continue; + GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); + } + } +} + void runWholeProgramDevirtOnIndex( ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs, std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) { @@ -737,11 +837,27 @@ void updateIndexWPDForExports( } // end namespace llvm +static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) { + // Check that summary index contains regular LTO module when performing + // export to prevent occasional use of index from pure ThinLTO compilation + // (-fno-split-lto-module). This kind of summary index is passed to + // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting. + const auto &ModPaths = Summary->modulePaths(); + if (ClSummaryAction != PassSummaryAction::Import && + ModPaths.find(ModuleSummaryIndex::getRegularLTOModuleName()) == + ModPaths.end()) + return createStringError( + errc::invalid_argument, + "combined summary should contain Regular LTO module"); + return ErrorSuccess(); +} + bool DevirtModule::runForTesting( Module &M, function_ref<AAResults &(Function &)> AARGetter, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter, function_ref<DominatorTree &(Function &)> LookupDomTree) { - ModuleSummaryIndex Summary(/*HaveGVs=*/false); + std::unique_ptr<ModuleSummaryIndex> Summary = + std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false); // Handle the command-line summary arguments. This code is for testing // purposes only, so we handle errors directly. @@ -750,28 +866,41 @@ bool DevirtModule::runForTesting( ": "); auto ReadSummaryFile = ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); - - yaml::Input In(ReadSummaryFile->getBuffer()); - In >> Summary; - ExitOnErr(errorCodeToError(In.error())); + if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = + getModuleSummaryIndex(*ReadSummaryFile)) { + Summary = std::move(*SummaryOrErr); + ExitOnErr(checkCombinedSummaryForTesting(Summary.get())); + } else { + // Try YAML if we've failed with bitcode. + consumeError(SummaryOrErr.takeError()); + yaml::Input In(ReadSummaryFile->getBuffer()); + In >> *Summary; + ExitOnErr(errorCodeToError(In.error())); + } } bool Changed = - DevirtModule( - M, AARGetter, OREGetter, LookupDomTree, - ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, - ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr) + DevirtModule(M, AARGetter, OREGetter, LookupDomTree, + ClSummaryAction == PassSummaryAction::Export ? Summary.get() + : nullptr, + ClSummaryAction == PassSummaryAction::Import ? Summary.get() + : nullptr) .run(); if (!ClWriteSummary.empty()) { ExitOnError ExitOnErr( "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); - ExitOnErr(errorCodeToError(EC)); - - yaml::Output Out(OS); - Out << Summary; + if (StringRef(ClWriteSummary).endswith(".bc")) { + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None); + ExitOnErr(errorCodeToError(EC)); + WriteIndexToFile(*Summary, OS); + } else { + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); + ExitOnErr(errorCodeToError(EC)); + yaml::Output Out(OS); + Out << *Summary; + } } return Changed; @@ -818,6 +947,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; + // We cannot perform whole program devirtualization analysis on a vtable + // with public LTO visibility. + if (TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) + return false; + Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), TM.Offset + ByteOffset, M); if (!Ptr) @@ -827,6 +962,9 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!Fn) return false; + if (FunctionsToSkip.match(Fn->getName())) + return false; + // We can disregard __cxa_pure_virtual as a possible call target, as // calls to pure virtuals are UB. if (Fn->getName() == "__cxa_pure_virtual") @@ -863,8 +1001,13 @@ bool DevirtIndex::tryFindVirtualCallTargets( return false; LocalFound = true; } - if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) + if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { VS = cast<GlobalVarSummary>(S->getBaseObject()); + // We cannot perform whole program devirtualization analysis on a vtable + // with public LTO visibility. + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) + return false; + } } if (!VS->isLive()) continue; @@ -887,8 +1030,8 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, if (RemarksEnabled) VCallSite.emitRemark("single-impl", TheFn->stripPointerCasts()->getName(), OREGetter); - VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast( - TheFn, VCallSite.CS.getCalledValue()->getType())); + VCallSite.CB.setCalledOperand(ConstantExpr::getBitCast( + TheFn, VCallSite.CB.getCalledOperand()->getType())); // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) --*VCallSite.NumUnsafeUses; @@ -979,7 +1122,7 @@ bool DevirtModule::trySingleImplDevirt( AddCalls(SlotInfo, TheFnVI); Res->TheKind = WholeProgramDevirtResolution::SingleImpl; - Res->SingleImplName = TheFn->getName(); + Res->SingleImplName = std::string(TheFn->getName()); return true; } @@ -1001,6 +1144,11 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot, if (!Size) return false; + // Don't devirtualize function if we're told to skip it + // in -wholeprogramdevirt-skip. + if (FunctionsToSkip.match(TheFn.name())) + return false; + // If the summary list contains multiple summaries where at least one is // a local, give up, as we won't know which (possibly promoted) name to use. for (auto &S : TheFn.getSummaryList()) @@ -1028,10 +1176,10 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot, TheFn.name(), ExportSummary.getModuleHash(S->modulePath())); else { LocalWPDTargetsMap[TheFn].push_back(SlotSummary); - Res->SingleImplName = TheFn.name(); + Res->SingleImplName = std::string(TheFn.name()); } } else - Res->SingleImplName = TheFn.name(); + Res->SingleImplName = std::string(TheFn.name()); // Name will be empty if this thin link driven off of serialized combined // index (e.g. llvm-lto). However, WPD is not supported/invoked for the @@ -1106,10 +1254,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, if (CSInfo.AllCallSitesDevirted) return; for (auto &&VCallSite : CSInfo.CallSites) { - CallSite CS = VCallSite.CS; + CallBase &CB = VCallSite.CB; // Jump tables are only profitable if the retpoline mitigation is enabled. - Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features"); + Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features"); if (FSAttr.hasAttribute(Attribute::None) || !FSAttr.getValueAsString().contains("+retpoline")) continue; @@ -1122,42 +1270,40 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, // x86_64. std::vector<Type *> NewArgs; NewArgs.push_back(Int8PtrTy); - for (Type *T : CS.getFunctionType()->params()) + for (Type *T : CB.getFunctionType()->params()) NewArgs.push_back(T); FunctionType *NewFT = - FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs, - CS.getFunctionType()->isVarArg()); + FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs, + CB.getFunctionType()->isVarArg()); PointerType *NewFTPtr = PointerType::getUnqual(NewFT); - IRBuilder<> IRB(CS.getInstruction()); + IRBuilder<> IRB(&CB); std::vector<Value *> Args; Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy)); - for (unsigned I = 0; I != CS.getNumArgOperands(); ++I) - Args.push_back(CS.getArgOperand(I)); + Args.insert(Args.end(), CB.arg_begin(), CB.arg_end()); - CallSite NewCS; - if (CS.isCall()) + CallBase *NewCS = nullptr; + if (isa<CallInst>(CB)) NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args); else - NewCS = IRB.CreateInvoke( - NewFT, IRB.CreateBitCast(JT, NewFTPtr), - cast<InvokeInst>(CS.getInstruction())->getNormalDest(), - cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args); - NewCS.setCallingConv(CS.getCallingConv()); + NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr), + cast<InvokeInst>(CB).getNormalDest(), + cast<InvokeInst>(CB).getUnwindDest(), Args); + NewCS->setCallingConv(CB.getCallingConv()); - AttributeList Attrs = CS.getAttributes(); + AttributeList Attrs = CB.getAttributes(); std::vector<AttributeSet> NewArgAttrs; NewArgAttrs.push_back(AttributeSet::get( M.getContext(), ArrayRef<Attribute>{Attribute::get( M.getContext(), Attribute::Nest)})); for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I) NewArgAttrs.push_back(Attrs.getParamAttributes(I)); - NewCS.setAttributes( + NewCS->setAttributes( AttributeList::get(M.getContext(), Attrs.getFnAttributes(), Attrs.getRetAttributes(), NewArgAttrs)); - CS->replaceAllUsesWith(NewCS.getInstruction()); - CS->eraseFromParent(); + CB.replaceAllUsesWith(NewCS); + CB.eraseFromParent(); // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) @@ -1208,7 +1354,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, for (auto Call : CSInfo.CallSites) Call.replaceAndErase( "uniform-ret-val", FnName, RemarksEnabled, OREGetter, - ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal)); + ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal)); CSInfo.markDevirt(); } @@ -1273,7 +1419,8 @@ void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name) { - Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty); + Constant *C = + M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty); auto *GV = dyn_cast<GlobalVariable>(C); if (GV) GV->setVisibility(GlobalValue::HiddenVisibility); @@ -1313,11 +1460,11 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne, Constant *UniqueMemberAddr) { for (auto &&Call : CSInfo.CallSites) { - IRBuilder<> B(Call.CS.getInstruction()); + IRBuilder<> B(&Call.CB); Value *Cmp = - B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, - B.CreateBitCast(Call.VTable, Int8PtrTy), UniqueMemberAddr); - Cmp = B.CreateZExt(Cmp, Call.CS->getType()); + B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable, + B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType())); + Cmp = B.CreateZExt(Cmp, Call.CB.getType()); Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter, Cmp); } @@ -1381,8 +1528,8 @@ bool DevirtModule::tryUniqueRetValOpt( void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName, Constant *Byte, Constant *Bit) { for (auto Call : CSInfo.CallSites) { - auto *RetType = cast<IntegerType>(Call.CS.getType()); - IRBuilder<> B(Call.CS.getInstruction()); + auto *RetType = cast<IntegerType>(Call.CB.getType()); + IRBuilder<> B(&Call.CB); Value *Addr = B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte); if (RetType->getBitWidth() == 1) { @@ -1507,10 +1654,8 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { // Align the before byte array to the global's minimum alignment so that we // don't break any alignment requirements on the global. - MaybeAlign Alignment(B.GV->getAlignment()); - if (!Alignment) - Alignment = - Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType())); + Align Alignment = M.getDataLayout().getValueOrABITypeAlignment( + B.GV->getAlign(), B.GV->getValueType()); B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment)); // Before was stored in reverse order; flip it now. @@ -1562,13 +1707,14 @@ bool DevirtModule::areRemarksEnabled() { return false; } -void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { +void DevirtModule::scanTypeTestUsers( + Function *TypeTestFunc, + DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) { // Find all virtual calls via a virtual table pointer %p under an assumption // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p // points to a member of the type identifier %md. Group calls by (type ID, // offset) pair (effectively the identity of the virtual function) and store // to CallSlots. - DenseSet<CallSite> SeenCallSites; for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end(); I != E;) { auto CI = dyn_cast<CallInst>(I->getUser()); @@ -1582,29 +1728,59 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) { auto &DT = LookupDomTree(*CI->getFunction()); findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + Metadata *TypeId = + cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata(); // If we found any, add them to CallSlots. if (!Assumes.empty()) { - Metadata *TypeId = - cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata(); Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); - for (DevirtCallSite Call : DevirtCalls) { - // Only add this CallSite if we haven't seen it before. The vtable - // pointer may have been CSE'd with pointers from other call sites, - // and we don't want to process call sites multiple times. We can't - // just skip the vtable Ptr if it has been seen before, however, since - // it may be shared by type tests that dominate different calls. - if (SeenCallSites.insert(Call.CS).second) - CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, nullptr); - } + for (DevirtCallSite Call : DevirtCalls) + CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr); } - // We no longer need the assumes or the type test. - for (auto Assume : Assumes) - Assume->eraseFromParent(); - // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we - // may use the vtable argument later. - if (CI->use_empty()) - CI->eraseFromParent(); + auto RemoveTypeTestAssumes = [&]() { + // We no longer need the assumes or the type test. + for (auto Assume : Assumes) + Assume->eraseFromParent(); + // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we + // may use the vtable argument later. + if (CI->use_empty()) + CI->eraseFromParent(); + }; + + // At this point we could remove all type test assume sequences, as they + // were originally inserted for WPD. However, we can keep these in the + // code stream for later analysis (e.g. to help drive more efficient ICP + // sequences). They will eventually be removed by a second LowerTypeTests + // invocation that cleans them up. In order to do this correctly, the first + // LowerTypeTests invocation needs to know that they have "Unknown" type + // test resolution, so that they aren't treated as Unsat and lowered to + // False, which will break any uses on assumes. Below we remove any type + // test assumes that will not be treated as Unknown by LTT. + + // The type test assumes will be treated by LTT as Unsat if the type id is + // not used on a global (in which case it has no entry in the TypeIdMap). + if (!TypeIdMap.count(TypeId)) + RemoveTypeTestAssumes(); + + // For ThinLTO importing, we need to remove the type test assumes if this is + // an MDString type id without a corresponding TypeIdSummary. Any + // non-MDString type ids are ignored and treated as Unknown by LTT, so their + // type test assumes can be kept. If the MDString type id is missing a + // TypeIdSummary (e.g. because there was no use on a vcall, preventing the + // exporting phase of WPD from analyzing it), then it would be treated as + // Unsat by LTT and we need to remove its type test assumes here. If not + // used on a vcall we don't need them for later optimization use in any + // case. + else if (ImportSummary && isa<MDString>(TypeId)) { + const TypeIdSummary *TidSummary = + ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString()); + if (!TidSummary) + RemoveTypeTestAssumes(); + else + // If one was created it should not be Unsat, because if we reached here + // the type id was used on a global. + assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat); + } } } @@ -1680,7 +1856,7 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { if (HasNonCallUses) ++NumUnsafeUses; for (DevirtCallSite Call : DevirtCalls) { - CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, + CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, &NumUnsafeUses); } @@ -1796,8 +1972,13 @@ bool DevirtModule::run() { (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; + // Rebuild type metadata into a map for easy lookup. + std::vector<VTableBits> Bits; + DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; + buildTypeIdentifierMap(Bits, TypeIdMap); + if (TypeTestFunc && AssumeFunc) - scanTypeTestUsers(TypeTestFunc); + scanTypeTestUsers(TypeTestFunc, TypeIdMap); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); @@ -1808,15 +1989,17 @@ bool DevirtModule::run() { removeRedundantTypeTests(); + // We have lowered or deleted the type instrinsics, so we will no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + // The rest of the code is only necessary when exporting or during regular // LTO, so we are done. return true; } - // Rebuild type metadata into a map for easy lookup. - std::vector<VTableBits> Bits; - DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; - buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; @@ -1873,14 +2056,22 @@ bool DevirtModule::run() { // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector<VirtualCallTarget> TargetsForSlot; - if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], + WholeProgramDevirtResolution *Res = nullptr; + const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID]; + if (ExportSummary && isa<MDString>(S.first.TypeID) && + TypeMemberInfos.size()) + // For any type id used on a global's type metadata, create the type id + // summary resolution regardless of whether we can devirtualize, so that + // lower type tests knows the type id is not Unsat. If it was not used on + // a global's type metadata, the TypeIdMap entry set will be empty, and + // we don't want to create an entry (with the default Unknown type + // resolution), which can prevent detection of the Unsat. + Res = &ExportSummary + ->getOrInsertTypeIdSummary( + cast<MDString>(S.first.TypeID)->getString()) + .WPDRes[S.first.ByteOffset]; + if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = nullptr; - if (ExportSummary && isa<MDString>(S.first.TypeID)) - Res = &ExportSummary - ->getOrInsertTypeIdSummary( - cast<MDString>(S.first.TypeID)->getString()) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= @@ -1893,7 +2084,7 @@ bool DevirtModule::run() { if (RemarksEnabled) for (const auto &T : TargetsForSlot) if (T.WasDevirt) - DevirtTargets[T.Fn->getName()] = T.Fn; + DevirtTargets[std::string(T.Fn->getName())] = T.Fn; } // CFI-specific: if we are exporting and any llvm.type.checked.load @@ -1931,7 +2122,7 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); - // We have lowered or deleted the type checked load intrinsics, so we no + // We have lowered or deleted the type instrinsics, so we will no // longer have enough information to reason about the liveness of virtual // function pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) @@ -1994,11 +2185,14 @@ void DevirtIndex::run() { std::vector<ValueInfo> TargetsForSlot; auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); assert(TidSummary); + // Create the type id summary resolution regardlness of whether we can + // devirtualize, so that lower type tests knows the type id is used on + // a global and not Unsat. + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, S.first.ByteOffset)) { - WholeProgramDevirtResolution *Res = - &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) - .WPDRes[S.first.ByteOffset]; if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, DevirtTargets)) |