summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO')
-rw-r--r--llvm/lib/Transforms/IPO/AlwaysInliner.cpp33
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp136
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp6832
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp7225
-rw-r--r--llvm/lib/Transforms/IPO/BlockExtractor.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/CalledValuePropagation.cpp31
-rw-r--r--llvm/lib/Transforms/IPO/ConstantMerge.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp262
-rw-r--r--llvm/lib/Transforms/IPO/ExtractGV.cpp13
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp87
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp133
-rw-r--r--llvm/lib/Transforms/IPO/GlobalDCE.cpp9
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp271
-rw-r--r--llvm/lib/Transforms/IPO/GlobalSplit.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/IPConstantPropagation.cpp37
-rw-r--r--llvm/lib/Transforms/IPO/IPO.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/InlineSimple.cpp15
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp617
-rw-r--r--llvm/lib/Transforms/IPO/LoopExtractor.cpp192
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp107
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp7
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp1501
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp165
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp166
-rw-r--r--llvm/lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp186
-rw-r--r--llvm/lib/Transforms/IPO/StripSymbols.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp422
30 files changed, 11429 insertions, 7070 deletions
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 06d1763353f46..53f9512f86f30 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -16,7 +16,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
@@ -37,30 +36,30 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// Add inline assumptions during code generation.
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+ InlineFunctionInfo IFI(/*cg=*/nullptr, GetAssumptionCache);
- SmallSetVector<CallSite, 16> Calls;
+ SmallSetVector<CallBase *, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
for (Function &F : M)
if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
- isInlineViable(F)) {
+ isInlineViable(F).isSuccess()) {
Calls.clear();
for (User *U : F.users())
- if (auto CS = CallSite(U))
- if (CS.getCalledFunction() == &F)
- Calls.insert(CS);
+ if (auto *CB = dyn_cast<CallBase>(U))
+ if (CB->getCalledFunction() == &F)
+ Calls.insert(CB);
- for (CallSite CS : Calls)
+ for (CallBase *CB : Calls)
// FIXME: We really shouldn't be able to fail to inline at this point!
// We should do something to log or check the inline failures here.
Changed |=
- InlineFunction(CS, IFI, /*CalleeAAR=*/nullptr, InsertLifetime);
+ InlineFunction(*CB, IFI, /*CalleeAAR=*/nullptr, InsertLifetime)
+ .isSuccess();
// Remember to try and delete this function afterward. This both avoids
// re-walking the rest of the module and avoids dealing with any iterator
@@ -116,7 +115,7 @@ public:
static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallSite CS) override;
+ InlineCost getInlineCost(CallBase &CB) override;
using llvm::Pass::doFinalization;
bool doFinalization(CallGraph &CG) override {
@@ -151,8 +150,8 @@ Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) {
/// computed here, but as we only expect to do this for relatively few and
/// small functions which have the explicit attribute to force inlining, it is
/// likely not worth it in practice.
-InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
- Function *Callee = CS.getCalledFunction();
+InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
+ Function *Callee = CB.getCalledFunction();
// Only inline direct calls to functions with always-inline attributes
// that are viable for inlining.
@@ -163,12 +162,12 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
if (Callee->isDeclaration())
return InlineCost::getNever("no definition");
- if (!CS.hasFnAttr(Attribute::AlwaysInline))
+ if (!CB.hasFnAttr(Attribute::AlwaysInline))
return InlineCost::getNever("no alwaysinline attribute");
auto IsViable = isInlineViable(*Callee);
- if (!IsViable)
- return InlineCost::getNever(IsViable.message);
+ if (!IsViable.isSuccess())
+ return InlineCost::getNever(IsViable.getFailureReason());
return InlineCost::getAlways("always inliner");
}
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index cdf8a2eb598ee..ad0d7eb51507a 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -36,7 +36,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -53,7 +52,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -74,6 +72,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include <algorithm>
@@ -105,7 +104,7 @@ using IndicesVector = std::vector<uint64_t>;
static Function *
doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
- Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
ReplaceCallSite) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
@@ -197,7 +196,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
for (const auto &ArgIndex : ArgIndices) {
// not allowed to dereference ->begin() if size() is 0
Params.push_back(GetElementPtrInst::getIndexedType(
- cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ cast<PointerType>(I->getType())->getElementType(),
ArgIndex.second));
ArgAttrVec.push_back(AttributeSet());
assert(Params.back());
@@ -241,15 +240,14 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
//
SmallVector<Value *, 16> Args;
while (!F->use_empty()) {
- CallSite CS(F->user_back());
- assert(CS.getCalledFunction() == F);
- Instruction *Call = CS.getInstruction();
- const AttributeList &CallPAL = CS.getAttributes();
- IRBuilder<NoFolder> IRB(Call);
+ CallBase &CB = cast<CallBase>(*F->user_back());
+ assert(CB.getCalledFunction() == F);
+ const AttributeList &CallPAL = CB.getAttributes();
+ IRBuilder<NoFolder> IRB(&CB);
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
- CallSite::arg_iterator AI = CS.arg_begin();
+ auto AI = CB.arg_begin();
ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++AI, ++ArgNo)
@@ -295,7 +293,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
ElTy = ElPTy->getElementType();
else
- ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
+ ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II);
}
// And create a GEP to extract those indices.
V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
@@ -305,7 +303,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// of the previous load.
LoadInst *newLoad =
IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
- newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment()));
+ newLoad->setAlignment(OrigLoad->getAlign());
// Transfer the AA info too.
AAMDNodes AAInfo;
OrigLoad->getAAMetadata(AAInfo);
@@ -317,46 +315,43 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
}
// Push any varargs arguments on the list.
- for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
+ for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
Args.push_back(*AI);
ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
}
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ CB.getOperandBundlesAsDefs(OpBundles);
- CallSite NewCS;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ CallBase *NewCS = nullptr;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
+ Args, OpBundles, "", &CB);
} else {
- auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
- NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+ auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
+ NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
NewCS = NewCall;
}
- NewCS.setCallingConv(CS.getCallingConv());
- NewCS.setAttributes(
+ NewCS->setCallingConv(CB.getCallingConv());
+ NewCS->setAttributes(
AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
CallPAL.getRetAttributes(), ArgAttrVec));
- NewCS->setDebugLoc(Call->getDebugLoc());
- uint64_t W;
- if (Call->extractProfTotalWeight(W))
- NewCS->setProfWeight(W);
+ NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
ArgAttrVec.clear();
// Update the callgraph to know that the callsite has been transformed.
if (ReplaceCallSite)
- (*ReplaceCallSite)(CS, NewCS);
+ (*ReplaceCallSite)(CB, *NewCS);
- if (!Call->use_empty()) {
- Call->replaceAllUsesWith(NewCS.getInstruction());
- NewCS->takeName(Call);
+ if (!CB.use_empty()) {
+ CB.replaceAllUsesWith(NewCS);
+ NewCS->takeName(&CB);
}
// Finally, remove the old call from the program, reducing the use-count of
// F.
- Call->eraseFromParent();
+ CB.eraseFromParent();
}
const DataLayout &DL = F->getParent()->getDataLayout();
@@ -387,9 +382,10 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca =
- new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
- MaybeAlign(I->getParamAlignment()), "", InsertPt);
+ Value *TheAlloca = new AllocaInst(
+ AgTy, DL.getAllocaAddrSpace(), nullptr,
+ I->getParamAlign().getValueOr(DL.getPrefTypeAlign(AgTy)), "",
+ InsertPt);
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
nullptr};
@@ -453,12 +449,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
assert(It != ArgIndices.end() && "GEP not handled??");
}
- std::string NewName = I->getName();
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- NewName += "." + utostr(Operands[i]);
- }
- NewName += ".val";
- TheArg->setName(NewName);
+ TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(),
+ make_range(Operands.begin(), Operands.end())));
LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
<< "' of function '" << NF->getName() << "'\n");
@@ -492,10 +484,9 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
// Look at all call sites of the function. At this point we know we only have
// direct callees.
for (User *U : Callee->users()) {
- CallSite CS(U);
- assert(CS && "Should only have direct calls!");
+ CallBase &CB = cast<CallBase>(*U);
- if (!isDereferenceablePointer(CS.getArgument(ArgNo), Ty, DL))
+ if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL))
return false;
}
return true;
@@ -774,8 +765,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
return true;
}
-/// Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
// There is no size information, so be conservative.
if (!type->isSized())
return false;
@@ -785,13 +775,18 @@ static bool isDenselyPacked(Type *type, const DataLayout &DL) {
if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
return false;
- if (!isa<CompositeType>(type))
- return true;
+ // FIXME: This isn't the right way to check for padding in vectors with
+ // non-byte-size elements.
+ if (VectorType *seqTy = dyn_cast<VectorType>(type))
+ return isDenselyPacked(seqTy->getElementType(), DL);
- // For homogenous sequential types, check for padding within members.
- if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ // For array types, check for padding within members.
+ if (ArrayType *seqTy = dyn_cast<ArrayType>(type))
return isDenselyPacked(seqTy->getElementType(), DL);
+ if (!isa<StructType>(type))
+ return true;
+
// Check for padding within and between elements of a struct.
StructType *StructTy = cast<StructType>(type);
const StructLayout *Layout = DL.getStructLayout(StructTy);
@@ -844,14 +839,16 @@ static bool canPaddingBeAccessed(Argument *arg) {
return false;
}
-static bool areFunctionArgsABICompatible(
+bool ArgumentPromotionPass::areFunctionArgsABICompatible(
const Function &F, const TargetTransformInfo &TTI,
SmallPtrSetImpl<Argument *> &ArgsToPromote,
SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
for (const Use &U : F.uses()) {
- CallSite CS(U.getUser());
- const Function *Caller = CS.getCaller();
- const Function *Callee = CS.getCalledFunction();
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
+ if (!CB)
+ return false;
+ const Function *Caller = CB->getCaller();
+ const Function *Callee = CB->getCalledFunction();
if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
!TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform))
return false;
@@ -866,7 +863,7 @@ static bool areFunctionArgsABICompatible(
static Function *
promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
unsigned MaxElements,
- Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
ReplaceCallSite,
const TargetTransformInfo &TTI) {
// Don't perform argument promotion for naked functions; otherwise we can end
@@ -905,16 +902,16 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// is self-recursive and check that target features are compatible.
bool isSelfRecursive = false;
for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
// Must be a direct call.
- if (CS.getInstruction() == nullptr || !CS.isCallee(&U))
+ if (CB == nullptr || !CB->isCallee(&U))
return nullptr;
// Can't change signature of musttail callee
- if (CS.isMustTailCall())
+ if (CB->isMustTailCall())
return nullptr;
- if (CS.getInstruction()->getParent()->getParent() == F)
+ if (CB->getParent()->getParent() == F)
isSelfRecursive = true;
}
@@ -942,18 +939,18 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
F->removeParamAttr(ArgNo, Attribute::StructRet);
F->addParamAttr(ArgNo, Attribute::NoAlias);
for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- CS.removeParamAttr(ArgNo, Attribute::StructRet);
- CS.addParamAttr(ArgNo, Attribute::NoAlias);
+ CallBase &CB = cast<CallBase>(*U.getUser());
+ CB.removeParamAttr(ArgNo, Attribute::StructRet);
+ CB.addParamAttr(ArgNo, Attribute::NoAlias);
}
}
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely
// packed or if we can prove the padding bytes are never accessed.
- bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+ bool isSafeToPromote = PtrArg->hasByValAttr() &&
+ (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
+ !canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
@@ -1011,8 +1008,8 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
return nullptr;
- if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
- ByValArgsToTransform))
+ if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+ *F, TTI, ArgsToPromote, ByValArgsToTransform))
return nullptr;
return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
@@ -1135,14 +1132,13 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
if (!OldF)
continue;
- auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) {
- Function *Caller = OldCS.getInstruction()->getParent()->getParent();
+ auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) {
+ Function *Caller = OldCS.getParent()->getParent();
CallGraphNode *NewCalleeNode =
CG.getOrInsertFunction(NewCS.getCalledFunction());
CallGraphNode *CallerNode = CG[Caller];
- CallerNode->replaceCallEdge(*cast<CallBase>(OldCS.getInstruction()),
- *cast<CallBase>(NewCS.getInstruction()),
- NewCalleeNode);
+ CallerNode->replaceCallEdge(cast<CallBase>(OldCS),
+ cast<CallBase>(NewCS), NewCalleeNode);
};
const TargetTransformInfo &TTI =
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index f2995817eaf89..f96dac5f3515c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements an inter procedural pass that deduces and/or propagating
+// This file implements an interprocedural pass that deduces and/or propagates
// attributes. This is done in an abstract interpretation style fixpoint
// iteration. See the Attributor.h file comment and the class descriptions in
// that file for more information.
@@ -15,29 +15,16 @@
#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -47,10 +34,12 @@ using namespace llvm;
#define DEBUG_TYPE "attributor"
+STATISTIC(NumFnDeleted, "Number of function deleted");
STATISTIC(NumFnWithExactDefinition,
- "Number of function with exact definitions");
+ "Number of functions with exact definitions");
STATISTIC(NumFnWithoutExactDefinition,
- "Number of function without exact definitions");
+ "Number of functions without exact definitions");
+STATISTIC(NumFnShallowWrapperCreated, "Number of shallow wrappers created");
STATISTIC(NumAttributesTimedOut,
"Number of abstract attributes timed out before fixpoint");
STATISTIC(NumAttributesValidFixpoint,
@@ -60,80 +49,6 @@ STATISTIC(NumAttributesManifested,
STATISTIC(NumAttributesFixedDueToRequiredDependences,
"Number of abstract attributes fixed due to required dependences");
-// Some helper macros to deal with statistics tracking.
-//
-// Usage:
-// For simple IR attribute tracking overload trackStatistics in the abstract
-// attribute and choose the right STATS_DECLTRACK_********* macro,
-// e.g.,:
-// void trackStatistics() const override {
-// STATS_DECLTRACK_ARG_ATTR(returned)
-// }
-// If there is a single "increment" side one can use the macro
-// STATS_DECLTRACK with a custom message. If there are multiple increment
-// sides, STATS_DECL and STATS_TRACK can also be used separatly.
-//
-#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \
- ("Number of " #TYPE " marked '" #NAME "'")
-#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME
-#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG);
-#define STATS_DECL(NAME, TYPE, MSG) \
- STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG);
-#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE));
-#define STATS_DECLTRACK(NAME, TYPE, MSG) \
- { \
- STATS_DECL(NAME, TYPE, MSG) \
- STATS_TRACK(NAME, TYPE) \
- }
-#define STATS_DECLTRACK_ARG_ATTR(NAME) \
- STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME))
-#define STATS_DECLTRACK_CSARG_ATTR(NAME) \
- STATS_DECLTRACK(NAME, CSArguments, \
- BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME))
-#define STATS_DECLTRACK_FN_ATTR(NAME) \
- STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME))
-#define STATS_DECLTRACK_CS_ATTR(NAME) \
- STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME))
-#define STATS_DECLTRACK_FNRET_ATTR(NAME) \
- STATS_DECLTRACK(NAME, FunctionReturn, \
- BUILD_STAT_MSG_IR_ATTR(function returns, NAME))
-#define STATS_DECLTRACK_CSRET_ATTR(NAME) \
- STATS_DECLTRACK(NAME, CSReturn, \
- BUILD_STAT_MSG_IR_ATTR(call site returns, NAME))
-#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \
- STATS_DECLTRACK(NAME, Floating, \
- ("Number of floating values known to be '" #NAME "'"))
-
-// Specialization of the operator<< for abstract attributes subclasses. This
-// disambiguates situations where multiple operators are applicable.
-namespace llvm {
-#define PIPE_OPERATOR(CLASS) \
- raw_ostream &operator<<(raw_ostream &OS, const CLASS &AA) { \
- return OS << static_cast<const AbstractAttribute &>(AA); \
- }
-
-PIPE_OPERATOR(AAIsDead)
-PIPE_OPERATOR(AANoUnwind)
-PIPE_OPERATOR(AANoSync)
-PIPE_OPERATOR(AANoRecurse)
-PIPE_OPERATOR(AAWillReturn)
-PIPE_OPERATOR(AANoReturn)
-PIPE_OPERATOR(AAReturnedValues)
-PIPE_OPERATOR(AANonNull)
-PIPE_OPERATOR(AANoAlias)
-PIPE_OPERATOR(AADereferenceable)
-PIPE_OPERATOR(AAAlign)
-PIPE_OPERATOR(AANoCapture)
-PIPE_OPERATOR(AAValueSimplify)
-PIPE_OPERATOR(AANoFree)
-PIPE_OPERATOR(AAHeapToStack)
-PIPE_OPERATOR(AAReachability)
-PIPE_OPERATOR(AAMemoryBehavior)
-PIPE_OPERATOR(AAValueConstantRange)
-
-#undef PIPE_OPERATOR
-} // namespace llvm
-
// TODO: Determine a good default value.
//
// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
@@ -151,30 +66,24 @@ static cl::opt<bool> VerifyMaxFixpointIterations(
cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
cl::init(false));
-static cl::opt<bool> DisableAttributor(
- "attributor-disable", cl::Hidden,
- cl::desc("Disable the attributor inter-procedural deduction pass."),
- cl::init(true));
-
static cl::opt<bool> AnnotateDeclarationCallSites(
"attributor-annotate-decl-cs", cl::Hidden,
cl::desc("Annotate call sites of function declarations."), cl::init(false));
-static cl::opt<bool> ManifestInternal(
- "attributor-manifest-internal", cl::Hidden,
- cl::desc("Manifest Attributor internal string attributes."),
- cl::init(false));
-
-static cl::opt<unsigned> DepRecInterval(
- "attributor-dependence-recompute-interval", cl::Hidden,
- cl::desc("Number of iterations until dependences are recomputed."),
- cl::init(4));
-
static cl::opt<bool> EnableHeapToStack("enable-heap-to-stack-conversion",
cl::init(true), cl::Hidden);
-static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
- cl::Hidden);
+static cl::opt<bool>
+ AllowShallowWrappers("attributor-allow-shallow-wrappers", cl::Hidden,
+ cl::desc("Allow the Attributor to create shallow "
+ "wrappers for non-exact definitions."),
+ cl::init(false));
+
+static cl::list<std::string>
+ SeedAllowList("attributor-seed-allow-list", cl::Hidden,
+ cl::desc("Comma seperated list of attrbute names that are "
+ "allowed to be seeded."),
+ cl::ZeroOrMore, cl::CommaSeparated);
/// Logic operators for the change status enum class.
///
@@ -187,6 +96,49 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
}
///}
+/// Return true if \p New is equal or worse than \p Old.
+static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
+ if (!Old.isIntAttribute())
+ return true;
+
+ return Old.getValueAsInt() >= New.getValueAsInt();
+}
+
+/// Return true if the information provided by \p Attr was added to the
+/// attribute list \p Attrs. This is only the case if it was not already present
+/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
+static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
+ AttributeList &Attrs, int AttrIdx) {
+
+ if (Attr.isEnumAttribute()) {
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+ if (Attr.isStringAttribute()) {
+ StringRef Kind = Attr.getKindAsString();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+ if (Attr.isIntAttribute()) {
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+
+ llvm_unreachable("Expected enum or string attribute!");
+}
+
Argument *IRPosition::getAssociatedArgument() const {
if (getPositionKind() == IRP_ARGUMENT)
return cast<Argument>(&getAnchorValue());
@@ -202,10 +154,10 @@ Argument *IRPosition::getAssociatedArgument() const {
// of the underlying call site operand, we want the corresponding callback
// callee argument and not the direct callee argument.
Optional<Argument *> CBCandidateArg;
- SmallVector<const Use *, 4> CBUses;
- ImmutableCallSite ICS(&getAnchorValue());
- AbstractCallSite::getCallbackUses(ICS, CBUses);
- for (const Use *U : CBUses) {
+ SmallVector<const Use *, 4> CallbackUses;
+ const auto &CB = cast<CallBase>(getAnchorValue());
+ AbstractCallSite::getCallbackUses(CB, CallbackUses);
+ for (const Use *U : CallbackUses) {
AbstractCallSite ACS(U);
assert(ACS && ACS.isCallbackCall());
if (!ACS.getCalledFunction())
@@ -234,176 +186,13 @@ Argument *IRPosition::getAssociatedArgument() const {
// If no callbacks were found, or none used the underlying call site operand
// exclusively, use the direct callee argument if available.
- const Function *Callee = ICS.getCalledFunction();
+ const Function *Callee = CB.getCalledFunction();
if (Callee && Callee->arg_size() > unsigned(ArgNo))
return Callee->getArg(ArgNo);
return nullptr;
}
-/// For calls (and invokes) we will only replace instruction uses to not disturb
-/// the old style call graph.
-/// TODO: Remove this once we get rid of the old PM.
-static void replaceAllInstructionUsesWith(Value &Old, Value &New) {
- if (!isa<CallBase>(Old))
- return Old.replaceAllUsesWith(&New);
- SmallVector<Use *, 8> Uses;
- for (Use &U : Old.uses())
- if (isa<Instruction>(U.getUser()))
- Uses.push_back(&U);
- for (Use *U : Uses)
- U->set(&New);
-}
-
-/// Recursively visit all values that might become \p IRP at some point. This
-/// will be done by looking through cast instructions, selects, phis, and calls
-/// with the "returned" attribute. Once we cannot look through the value any
-/// further, the callback \p VisitValueCB is invoked and passed the current
-/// value, the \p State, and a flag to indicate if we stripped anything. To
-/// limit how much effort is invested, we will never visit more values than
-/// specified by \p MaxValues.
-template <typename AAType, typename StateTy>
-static bool genericValueTraversal(
- Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State,
- const function_ref<bool(Value &, StateTy &, bool)> &VisitValueCB,
- int MaxValues = 8) {
-
- const AAIsDead *LivenessAA = nullptr;
- if (IRP.getAnchorScope())
- LivenessAA = &A.getAAFor<AAIsDead>(
- QueryingAA, IRPosition::function(*IRP.getAnchorScope()),
- /* TrackDependence */ false);
- bool AnyDead = false;
-
- // TODO: Use Positions here to allow context sensitivity in VisitValueCB
- SmallPtrSet<Value *, 16> Visited;
- SmallVector<Value *, 16> Worklist;
- Worklist.push_back(&IRP.getAssociatedValue());
-
- int Iteration = 0;
- do {
- Value *V = Worklist.pop_back_val();
-
- // Check if we should process the current value. To prevent endless
- // recursion keep a record of the values we followed!
- if (!Visited.insert(V).second)
- continue;
-
- // Make sure we limit the compile time for complex expressions.
- if (Iteration++ >= MaxValues)
- return false;
-
- // Explicitly look through calls with a "returned" attribute if we do
- // not have a pointer as stripPointerCasts only works on them.
- Value *NewV = nullptr;
- if (V->getType()->isPointerTy()) {
- NewV = V->stripPointerCasts();
- } else {
- CallSite CS(V);
- if (CS && CS.getCalledFunction()) {
- for (Argument &Arg : CS.getCalledFunction()->args())
- if (Arg.hasReturnedAttr()) {
- NewV = CS.getArgOperand(Arg.getArgNo());
- break;
- }
- }
- }
- if (NewV && NewV != V) {
- Worklist.push_back(NewV);
- continue;
- }
-
- // Look through select instructions, visit both potential values.
- if (auto *SI = dyn_cast<SelectInst>(V)) {
- Worklist.push_back(SI->getTrueValue());
- Worklist.push_back(SI->getFalseValue());
- continue;
- }
-
- // Look through phi nodes, visit all live operands.
- if (auto *PHI = dyn_cast<PHINode>(V)) {
- assert(LivenessAA &&
- "Expected liveness in the presence of instructions!");
- for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
- const BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) {
- AnyDead = true;
- continue;
- }
- Worklist.push_back(PHI->getIncomingValue(u));
- }
- continue;
- }
-
- // Once a leaf is reached we inform the user through the callback.
- if (!VisitValueCB(*V, State, Iteration > 1))
- return false;
- } while (!Worklist.empty());
-
- // If we actually used liveness information so we have to record a dependence.
- if (AnyDead)
- A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
-
- // All values have been visited.
- return true;
-}
-
-/// Return true if \p New is equal or worse than \p Old.
-static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
- if (!Old.isIntAttribute())
- return true;
-
- return Old.getValueAsInt() >= New.getValueAsInt();
-}
-
-/// Return true if the information provided by \p Attr was added to the
-/// attribute list \p Attrs. This is only the case if it was not already present
-/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
-static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
- AttributeList &Attrs, int AttrIdx) {
-
- if (Attr.isEnumAttribute()) {
- Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
- if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
- return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
- return true;
- }
- if (Attr.isStringAttribute()) {
- StringRef Kind = Attr.getKindAsString();
- if (Attrs.hasAttribute(AttrIdx, Kind))
- if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
- return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
- return true;
- }
- if (Attr.isIntAttribute()) {
- Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
- if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
- return false;
- Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
- return true;
- }
-
- llvm_unreachable("Expected enum or string attribute!");
-}
-
-static const Value *
-getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
- const DataLayout &DL,
- bool AllowNonInbounds = false) {
- const Value *Ptr =
- Attributor::getPointerOperand(I, /* AllowVolatile */ false);
- if (!Ptr)
- return nullptr;
-
- return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL,
- AllowNonInbounds);
-}
-
ChangeStatus AbstractAttribute::update(Attributor &A) {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
if (getState().isAtFixpoint())
@@ -422,7 +211,7 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
ChangeStatus
IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
const ArrayRef<Attribute> &DeducedAttrs) {
- Function *ScopeFn = IRP.getAssociatedFunction();
+ Function *ScopeFn = IRP.getAnchorScope();
IRPosition::Kind PK = IRP.getPositionKind();
// In the following some generic code that will manifest attributes in
@@ -442,7 +231,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
- Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes();
+ Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes();
break;
}
@@ -467,7 +256,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
- CallSite(&IRP.getAnchorValue()).setAttributes(Attrs);
+ cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs);
break;
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
@@ -477,13 +266,14 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
return HasChanged;
}
-const IRPosition IRPosition::EmptyKey(255);
-const IRPosition IRPosition::TombstoneKey(256);
+const IRPosition IRPosition::EmptyKey(DenseMapInfo<void *>::getEmptyKey());
+const IRPosition
+ IRPosition::TombstoneKey(DenseMapInfo<void *>::getTombstoneKey());
SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRP);
- ImmutableCallSite ICS(&IRP.getAnchorValue());
+ const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
@@ -491,37 +281,43 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
return;
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_RETURNED:
- IRPositions.emplace_back(
- IRPosition::function(*IRP.getAssociatedFunction()));
+ IRPositions.emplace_back(IRPosition::function(*IRP.getAnchorScope()));
return;
case IRPosition::IRP_CALL_SITE:
- assert(ICS && "Expected call site!");
+ assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!ICS.hasOperandBundles())
- if (const Function *Callee = ICS.getCalledFunction())
+ if (!CB->hasOperandBundles())
+ if (const Function *Callee = CB->getCalledFunction())
IRPositions.emplace_back(IRPosition::function(*Callee));
return;
case IRPosition::IRP_CALL_SITE_RETURNED:
- assert(ICS && "Expected call site!");
+ assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!ICS.hasOperandBundles()) {
- if (const Function *Callee = ICS.getCalledFunction()) {
+ if (!CB->hasOperandBundles()) {
+ if (const Function *Callee = CB->getCalledFunction()) {
IRPositions.emplace_back(IRPosition::returned(*Callee));
IRPositions.emplace_back(IRPosition::function(*Callee));
+ for (const Argument &Arg : Callee->args())
+ if (Arg.hasReturnedAttr()) {
+ IRPositions.emplace_back(
+ IRPosition::callsite_argument(*CB, Arg.getArgNo()));
+ IRPositions.emplace_back(
+ IRPosition::value(*CB->getArgOperand(Arg.getArgNo())));
+ IRPositions.emplace_back(IRPosition::argument(Arg));
+ }
}
}
- IRPositions.emplace_back(
- IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction())));
+ IRPositions.emplace_back(IRPosition::callsite_function(*CB));
return;
case IRPosition::IRP_CALL_SITE_ARGUMENT: {
int ArgNo = IRP.getArgNo();
- assert(ICS && ArgNo >= 0 && "Expected call site!");
+ assert(CB && ArgNo >= 0 && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
- if (!ICS.hasOperandBundles()) {
- const Function *Callee = ICS.getCalledFunction();
+ if (!CB->hasOperandBundles()) {
+ const Function *Callee = CB->getCalledFunction();
if (Callee && Callee->arg_size() > unsigned(ArgNo))
IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
if (Callee)
@@ -534,10 +330,11 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
}
bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
- bool IgnoreSubsumingPositions) const {
+ bool IgnoreSubsumingPositions, Attributor *A) const {
+ SmallVector<Attribute, 4> Attrs;
for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
for (Attribute::AttrKind AK : AKs)
- if (EquivIRP.getAttr(AK).getKindAsEnum() == AK)
+ if (EquivIRP.getAttrsFromIRAttr(AK, Attrs))
return true;
// The first position returned by the SubsumingPositionIterator is
// always the position itself. If we ignore subsuming positions we
@@ -545,5052 +342,300 @@ bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
if (IgnoreSubsumingPositions)
break;
}
+ if (A)
+ for (Attribute::AttrKind AK : AKs)
+ if (getAttrsFromAssumes(AK, Attrs, *A))
+ return true;
return false;
}
void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs,
SmallVectorImpl<Attribute> &Attrs,
- bool IgnoreSubsumingPositions) const {
+ bool IgnoreSubsumingPositions, Attributor *A) const {
for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
- for (Attribute::AttrKind AK : AKs) {
- const Attribute &Attr = EquivIRP.getAttr(AK);
- if (Attr.getKindAsEnum() == AK)
- Attrs.push_back(Attr);
- }
+ for (Attribute::AttrKind AK : AKs)
+ EquivIRP.getAttrsFromIRAttr(AK, Attrs);
// The first position returned by the SubsumingPositionIterator is
// always the position itself. If we ignore subsuming positions we
// are done after the first iteration.
if (IgnoreSubsumingPositions)
break;
}
+ if (A)
+ for (Attribute::AttrKind AK : AKs)
+ getAttrsFromAssumes(AK, Attrs, *A);
+}
+
+bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
+ SmallVectorImpl<Attribute> &Attrs) const {
+ if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
+ return false;
+
+ AttributeList AttrList;
+ if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ AttrList = CB->getAttributes();
+ else
+ AttrList = getAssociatedFunction()->getAttributes();
+
+ bool HasAttr = AttrList.hasAttribute(getAttrIdx(), AK);
+ if (HasAttr)
+ Attrs.push_back(AttrList.getAttribute(getAttrIdx(), AK));
+ return HasAttr;
+}
+
+bool IRPosition::getAttrsFromAssumes(Attribute::AttrKind AK,
+ SmallVectorImpl<Attribute> &Attrs,
+ Attributor &A) const {
+ assert(getPositionKind() != IRP_INVALID && "Did expect a valid position!");
+ Value &AssociatedValue = getAssociatedValue();
+
+ const Assume2KnowledgeMap &A2K =
+ A.getInfoCache().getKnowledgeMap().lookup({&AssociatedValue, AK});
+
+ // Check if we found any potential assume use, if not we don't need to create
+ // explorer iterators.
+ if (A2K.empty())
+ return false;
+
+ LLVMContext &Ctx = AssociatedValue.getContext();
+ unsigned AttrsSize = Attrs.size();
+ MustBeExecutedContextExplorer &Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
+ auto EIt = Explorer.begin(getCtxI()), EEnd = Explorer.end(getCtxI());
+ for (auto &It : A2K)
+ if (Explorer.findInContextOf(It.first, EIt, EEnd))
+ Attrs.push_back(Attribute::get(Ctx, AK, It.second.Max));
+ return AttrsSize != Attrs.size();
}
void IRPosition::verify() {
- switch (KindOrArgNo) {
- default:
- assert(KindOrArgNo >= 0 && "Expected argument or call site argument!");
- assert((isa<CallBase>(AnchorVal) || isa<Argument>(AnchorVal)) &&
- "Expected call base or argument for positive attribute index!");
- if (isa<Argument>(AnchorVal)) {
- assert(cast<Argument>(AnchorVal)->getArgNo() == unsigned(getArgNo()) &&
- "Argument number mismatch!");
- assert(cast<Argument>(AnchorVal) == &getAssociatedValue() &&
- "Associated value mismatch!");
- } else {
- assert(cast<CallBase>(*AnchorVal).arg_size() > unsigned(getArgNo()) &&
- "Call site argument number mismatch!");
- assert(cast<CallBase>(*AnchorVal).getArgOperand(getArgNo()) ==
- &getAssociatedValue() &&
- "Associated value mismatch!");
- }
- break;
+#ifdef EXPENSIVE_CHECKS
+ switch (getPositionKind()) {
case IRP_INVALID:
- assert(!AnchorVal && "Expected no value for an invalid position!");
- break;
+ assert(!Enc.getOpaqueValue() &&
+ "Expected a nullptr for an invalid position!");
+ return;
case IRP_FLOAT:
assert((!isa<CallBase>(&getAssociatedValue()) &&
!isa<Argument>(&getAssociatedValue())) &&
"Expected specialized kind for call base and argument values!");
- break;
+ return;
case IRP_RETURNED:
- assert(isa<Function>(AnchorVal) &&
+ assert(isa<Function>(getAsValuePtr()) &&
"Expected function for a 'returned' position!");
- assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
- break;
+ assert(getAsValuePtr() == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ return;
case IRP_CALL_SITE_RETURNED:
- assert((isa<CallBase>(AnchorVal)) &&
+ assert((isa<CallBase>(getAsValuePtr())) &&
"Expected call base for 'call site returned' position!");
- assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
- break;
+ assert(getAsValuePtr() == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ return;
case IRP_CALL_SITE:
- assert((isa<CallBase>(AnchorVal)) &&
+ assert((isa<CallBase>(getAsValuePtr())) &&
"Expected call base for 'call site function' position!");
- assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
- break;
+ assert(getAsValuePtr() == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ return;
case IRP_FUNCTION:
- assert(isa<Function>(AnchorVal) &&
+ assert(isa<Function>(getAsValuePtr()) &&
"Expected function for a 'function' position!");
- assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!");
- break;
- }
-}
-
-namespace {
-/// Helper function to clamp a state \p S of type \p StateType with the
-/// information in \p R and indicate/return if \p S did change (as-in update is
-/// required to be run again).
-template <typename StateType>
-ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
- auto Assumed = S.getAssumed();
- S ^= R;
- return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
-}
-
-/// Clamp the information known for all returned values of a function
-/// (identified by \p QueryingAA) into \p S.
-template <typename AAType, typename StateType = typename AAType::StateType>
-static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
- StateType &S) {
- LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for "
- << QueryingAA << " into " << S << "\n");
-
- assert((QueryingAA.getIRPosition().getPositionKind() ==
- IRPosition::IRP_RETURNED ||
- QueryingAA.getIRPosition().getPositionKind() ==
- IRPosition::IRP_CALL_SITE_RETURNED) &&
- "Can only clamp returned value states for a function returned or call "
- "site returned position!");
-
- // Use an optional state as there might not be any return values and we want
- // to join (IntegerState::operator&) the state of all there are.
- Optional<StateType> T;
-
- // Callback for each possibly returned value.
- auto CheckReturnValue = [&](Value &RV) -> bool {
- const IRPosition &RVPos = IRPosition::value(RV);
- const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos);
- LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
- << " @ " << RVPos << "\n");
- const StateType &AAS = static_cast<const StateType &>(AA.getState());
- if (T.hasValue())
- *T &= AAS;
- else
- T = AAS;
- LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
- << "\n");
- return T->isValidState();
- };
-
- if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
- S.indicatePessimisticFixpoint();
- else if (T.hasValue())
- S ^= *T;
-}
-
-/// Helper class to compose two generic deduction
-template <typename AAType, typename Base, typename StateType,
- template <typename...> class F, template <typename...> class G>
-struct AAComposeTwoGenericDeduction
- : public F<AAType, G<AAType, Base, StateType>, StateType> {
- AAComposeTwoGenericDeduction(const IRPosition &IRP)
- : F<AAType, G<AAType, Base, StateType>, StateType>(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus ChangedF =
- F<AAType, G<AAType, Base, StateType>, StateType>::updateImpl(A);
- ChangeStatus ChangedG = G<AAType, Base, StateType>::updateImpl(A);
- return ChangedF | ChangedG;
+ assert(getAsValuePtr() == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ return;
+ case IRP_ARGUMENT:
+ assert(isa<Argument>(getAsValuePtr()) &&
+ "Expected argument for a 'argument' position!");
+ assert(getAsValuePtr() == &getAssociatedValue() &&
+ "Associated value mismatch!");
+ return;
+ case IRP_CALL_SITE_ARGUMENT: {
+ Use *U = getAsUsePtr();
+ assert(U && "Expected use for a 'call site argument' position!");
+ assert(isa<CallBase>(U->getUser()) &&
+ "Expected call base user for a 'call site argument' position!");
+ assert(cast<CallBase>(U->getUser())->isArgOperand(U) &&
+ "Expected call base argument operand for a 'call site argument' "
+ "position");
+ assert(cast<CallBase>(U->getUser())->getArgOperandNo(U) ==
+ unsigned(getArgNo()) &&
+ "Argument number mismatch!");
+ assert(U->get() == &getAssociatedValue() && "Associated value mismatch!");
+ return;
}
-};
-
-/// Helper class for generic deduction: return value -> returned position.
-template <typename AAType, typename Base,
- typename StateType = typename AAType::StateType>
-struct AAReturnedFromReturnedValues : public Base {
- AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- StateType S;
- clampReturnedValueStates<AAType, StateType>(A, *this, S);
- // TODO: If we know we visited all returned values, thus no are assumed
- // dead, we can take the known information from the state T.
- return clampStateAndIndicateChange<StateType>(this->getState(), S);
}
-};
-
-/// Clamp the information known at all call sites for a given argument
-/// (identified by \p QueryingAA) into \p S.
-template <typename AAType, typename StateType = typename AAType::StateType>
-static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
- StateType &S) {
- LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
- << QueryingAA << " into " << S << "\n");
-
- assert(QueryingAA.getIRPosition().getPositionKind() ==
- IRPosition::IRP_ARGUMENT &&
- "Can only clamp call site argument states for an argument position!");
-
- // Use an optional state as there might not be any return values and we want
- // to join (IntegerState::operator&) the state of all there are.
- Optional<StateType> T;
-
- // The argument number which is also the call site argument number.
- unsigned ArgNo = QueryingAA.getIRPosition().getArgNo();
-
- auto CallSiteCheck = [&](AbstractCallSite ACS) {
- const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
- // Check if a coresponding argument was found or if it is on not associated
- // (which can happen for callback calls).
- if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
- return false;
-
- const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos);
- LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
- << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
- const StateType &AAS = static_cast<const StateType &>(AA.getState());
- if (T.hasValue())
- *T &= AAS;
- else
- T = AAS;
- LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
- << "\n");
- return T->isValidState();
- };
-
- if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true))
- S.indicatePessimisticFixpoint();
- else if (T.hasValue())
- S ^= *T;
+#endif
}
-/// Helper class for generic deduction: call site argument -> argument position.
-template <typename AAType, typename Base,
- typename StateType = typename AAType::StateType>
-struct AAArgumentFromCallSiteArguments : public Base {
- AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- StateType S;
- clampCallSiteArgumentStates<AAType, StateType>(A, *this, S);
- // TODO: If we know we visited all incoming values, thus no are assumed
- // dead, we can take the known information from the state T.
- return clampStateAndIndicateChange<StateType>(this->getState(), S);
- }
-};
-
-/// Helper class for generic replication: function returned -> cs returned.
-template <typename AAType, typename Base,
- typename StateType = typename AAType::StateType>
-struct AACallSiteReturnedFromReturned : public Base {
- AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- assert(this->getIRPosition().getPositionKind() ==
- IRPosition::IRP_CALL_SITE_RETURNED &&
- "Can only wrap function returned positions for call site returned "
- "positions!");
- auto &S = this->getState();
-
- const Function *AssociatedFunction =
- this->getIRPosition().getAssociatedFunction();
- if (!AssociatedFunction)
- return S.indicatePessimisticFixpoint();
-
- IRPosition FnPos = IRPosition::returned(*AssociatedFunction);
- const AAType &AA = A.getAAFor<AAType>(*this, FnPos);
- return clampStateAndIndicateChange(
- S, static_cast<const typename AAType::StateType &>(AA.getState()));
- }
-};
-
-/// Helper class for generic deduction using must-be-executed-context
-/// Base class is required to have `followUse` method.
-
-/// bool followUse(Attributor &A, const Use *U, const Instruction *I)
-/// U - Underlying use.
-/// I - The user of the \p U.
-/// `followUse` returns true if the value should be tracked transitively.
-
-template <typename AAType, typename Base,
- typename StateType = typename AAType::StateType>
-struct AAFromMustBeExecutedContext : public Base {
- AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {}
-
- void initialize(Attributor &A) override {
- Base::initialize(A);
- const IRPosition &IRP = this->getIRPosition();
- Instruction *CtxI = IRP.getCtxI();
-
- if (!CtxI)
- return;
-
- for (const Use &U : IRP.getAssociatedValue().uses())
- Uses.insert(&U);
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- auto BeforeState = this->getState();
- auto &S = this->getState();
- Instruction *CtxI = this->getIRPosition().getCtxI();
- if (!CtxI)
- return ChangeStatus::UNCHANGED;
-
- MustBeExecutedContextExplorer &Explorer =
- A.getInfoCache().getMustBeExecutedContextExplorer();
-
- auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
- for (unsigned u = 0; u < Uses.size(); ++u) {
- const Use *U = Uses[u];
- if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
- bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
- if (Found && Base::followUse(A, U, UserI))
- for (const Use &Us : UserI->uses())
- Uses.insert(&Us);
- }
- }
-
- return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
- }
-
-private:
- /// Container for (transitive) uses of the associated value.
- SetVector<const Use *> Uses;
-};
-
-template <typename AAType, typename Base,
- typename StateType = typename AAType::StateType>
-using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext =
- AAComposeTwoGenericDeduction<AAType, Base, StateType,
- AAFromMustBeExecutedContext,
- AAArgumentFromCallSiteArguments>;
-
-template <typename AAType, typename Base,
- typename StateType = typename AAType::StateType>
-using AACallSiteReturnedFromReturnedAndMustBeExecutedContext =
- AAComposeTwoGenericDeduction<AAType, Base, StateType,
- AAFromMustBeExecutedContext,
- AACallSiteReturnedFromReturned>;
-
-/// -----------------------NoUnwind Function Attribute--------------------------
-
-struct AANoUnwindImpl : AANoUnwind {
- AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {}
-
- const std::string getAsStr() const override {
- return getAssumed() ? "nounwind" : "may-unwind";
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- auto Opcodes = {
- (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
- (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
- (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
-
- auto CheckForNoUnwind = [&](Instruction &I) {
- if (!I.mayThrow())
- return true;
-
- if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
- const auto &NoUnwindAA =
- A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
- return NoUnwindAA.isAssumedNoUnwind();
- }
- return false;
- };
-
- if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes))
- return indicatePessimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
- }
-};
-
-struct AANoUnwindFunction final : public AANoUnwindImpl {
- AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) }
-};
-
-/// NoUnwind attribute deduction for a call sites.
-struct AANoUnwindCallSite final : AANoUnwindImpl {
- AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoUnwindImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoUnwind::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
-};
-
-/// --------------------- Function Return Values -------------------------------
-
-/// "Attribute" that collects all potential returned values and the return
-/// instructions that they arise from.
-///
-/// If there is a unique returned value R, the manifest method will:
-/// - mark R with the "returned" attribute, if R is an argument.
-class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
-
- /// Mapping of values potentially returned by the associated function to the
- /// return instructions that might return them.
- MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
-
- /// Mapping to remember the number of returned values for a call site such
- /// that we can avoid updates if nothing changed.
- DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA;
-
- /// Set of unresolved calls returned by the associated function.
- SmallSetVector<CallBase *, 4> UnresolvedCalls;
-
- /// State flags
- ///
- ///{
- bool IsFixed = false;
- bool IsValidState = true;
- ///}
-
-public:
- AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // Reset the state.
- IsFixed = false;
- IsValidState = true;
- ReturnedValues.clear();
-
- Function *F = getAssociatedFunction();
- if (!F) {
- indicatePessimisticFixpoint();
- return;
- }
-
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F);
-
- // Look through all arguments, if one is marked as returned we are done.
- for (Argument &Arg : F->args()) {
- if (Arg.hasReturnedAttr()) {
- auto &ReturnInstSet = ReturnedValues[&Arg];
- for (Instruction *RI : OpcodeInstMap[Instruction::Ret])
- ReturnInstSet.insert(cast<ReturnInst>(RI));
-
- indicateOptimisticFixpoint();
- return;
- }
- }
-
- if (!F->hasExactDefinition())
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override;
-
- /// See AbstractAttribute::getState(...).
- AbstractState &getState() override { return *this; }
-
- /// See AbstractAttribute::getState(...).
- const AbstractState &getState() const override { return *this; }
-
- /// See AbstractAttribute::updateImpl(Attributor &A).
- ChangeStatus updateImpl(Attributor &A) override;
-
- llvm::iterator_range<iterator> returned_values() override {
- return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
- }
-
- llvm::iterator_range<const_iterator> returned_values() const override {
- return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
- }
-
- const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override {
- return UnresolvedCalls;
- }
-
- /// Return the number of potential return values, -1 if unknown.
- size_t getNumReturnValues() const override {
- return isValidState() ? ReturnedValues.size() : -1;
- }
-
- /// Return an assumed unique return value if a single candidate is found. If
- /// there cannot be one, return a nullptr. If it is not clear yet, return the
- /// Optional::NoneType.
- Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
-
- /// See AbstractState::checkForAllReturnedValues(...).
- bool checkForAllReturnedValuesAndReturnInsts(
- const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
- &Pred) const override;
-
- /// Pretty print the attribute similar to the IR representation.
- const std::string getAsStr() const override;
-
- /// See AbstractState::isAtFixpoint().
- bool isAtFixpoint() const override { return IsFixed; }
-
- /// See AbstractState::isValidState().
- bool isValidState() const override { return IsValidState; }
-
- /// See AbstractState::indicateOptimisticFixpoint(...).
- ChangeStatus indicateOptimisticFixpoint() override {
- IsFixed = true;
- return ChangeStatus::UNCHANGED;
+Optional<Constant *>
+Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
+ bool &UsedAssumedInformation) {
+ const auto &ValueSimplifyAA = getAAFor<AAValueSimplify>(
+ AA, IRPosition::value(V), /* TrackDependence */ false);
+ Optional<Value *> SimplifiedV =
+ ValueSimplifyAA.getAssumedSimplifiedValue(*this);
+ bool IsKnown = ValueSimplifyAA.isKnown();
+ UsedAssumedInformation |= !IsKnown;
+ if (!SimplifiedV.hasValue()) {
+ recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
+ return llvm::None;
}
-
- ChangeStatus indicatePessimisticFixpoint() override {
- IsFixed = true;
- IsValidState = false;
- return ChangeStatus::CHANGED;
+ if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) {
+ recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
+ return llvm::None;
}
-};
-
-ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- // Bookkeeping.
- assert(isValidState());
- STATS_DECLTRACK(KnownReturnValues, FunctionReturn,
- "Number of function with known return values");
-
- // Check if we have an assumed unique return value that we could manifest.
- Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
-
- if (!UniqueRV.hasValue() || !UniqueRV.getValue())
- return Changed;
-
- // Bookkeeping.
- STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
- "Number of function with unique return");
-
- // Callback to replace the uses of CB with the constant C.
- auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) {
- if (CB.getNumUses() == 0 || CB.isMustTailCall())
- return ChangeStatus::UNCHANGED;
- replaceAllInstructionUsesWith(CB, C);
- return ChangeStatus::CHANGED;
- };
-
- // If the assumed unique return value is an argument, annotate it.
- if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
- // TODO: This should be handled differently!
- this->AnchorVal = UniqueRVArg;
- this->KindOrArgNo = UniqueRVArg->getArgNo();
- Changed = IRAttribute::manifest(A);
- } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) {
- // We can replace the returned value with the unique returned constant.
- Value &AnchorValue = getAnchorValue();
- if (Function *F = dyn_cast<Function>(&AnchorValue)) {
- for (const Use &U : F->uses())
- if (CallBase *CB = dyn_cast<CallBase>(U.getUser()))
- if (CB->isCallee(&U)) {
- Constant *RVCCast =
- CB->getType() == RVC->getType()
- ? RVC
- : ConstantExpr::getTruncOrBitCast(RVC, CB->getType());
- Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed;
- }
- } else {
- assert(isa<CallBase>(AnchorValue) &&
- "Expcected a function or call base anchor!");
- Constant *RVCCast =
- AnchorValue.getType() == RVC->getType()
- ? RVC
- : ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType());
- Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast);
- }
- if (Changed == ChangeStatus::CHANGED)
- STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn,
- "Number of function returns replaced by constant return");
+ Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.getValue());
+ if (CI && CI->getType() != V.getType()) {
+ // TODO: Check for a save conversion.
+ return nullptr;
}
-
- return Changed;
-}
-
-const std::string AAReturnedValuesImpl::getAsStr() const {
- return (isAtFixpoint() ? "returns(#" : "may-return(#") +
- (isValidState() ? std::to_string(getNumReturnValues()) : "?") +
- ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]";
+ if (CI)
+ recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
+ return CI;
}
-Optional<Value *>
-AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const {
- // If checkForAllReturnedValues provides a unique value, ignoring potential
- // undef values that can also be present, it is assumed to be the actual
- // return value and forwarded to the caller of this method. If there are
- // multiple, a nullptr is returned indicating there cannot be a unique
- // returned value.
- Optional<Value *> UniqueRV;
-
- auto Pred = [&](Value &RV) -> bool {
- // If we found a second returned value and neither the current nor the saved
- // one is an undef, there is no unique returned value. Undefs are special
- // since we can pretend they have any value.
- if (UniqueRV.hasValue() && UniqueRV != &RV &&
- !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) {
- UniqueRV = nullptr;
- return false;
- }
-
- // Do not overwrite a value with an undef.
- if (!UniqueRV.hasValue() || !isa<UndefValue>(RV))
- UniqueRV = &RV;
-
- return true;
- };
-
- if (!A.checkForAllReturnedValues(Pred, *this))
- UniqueRV = nullptr;
-
- return UniqueRV;
+Attributor::~Attributor() {
+ // The abstract attributes are allocated via the BumpPtrAllocator Allocator,
+ // thus we cannot delete them. We can, and want to, destruct them though.
+ for (AbstractAttribute *AA : AllAbstractAttributes)
+ AA->~AbstractAttribute();
}
-bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
- const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
- &Pred) const {
- if (!isValidState())
+bool Attributor::isAssumedDead(const AbstractAttribute &AA,
+ const AAIsDead *FnLivenessAA,
+ bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ const IRPosition &IRP = AA.getIRPosition();
+ if (!Functions.count(IRP.getAnchorScope()))
return false;
-
- // Check all returned values but ignore call sites as long as we have not
- // encountered an overdefined one during an update.
- for (auto &It : ReturnedValues) {
- Value *RV = It.first;
-
- CallBase *CB = dyn_cast<CallBase>(RV);
- if (CB && !UnresolvedCalls.count(CB))
- continue;
-
- if (!Pred(*RV, It.second))
- return false;
- }
-
- return true;
-}
-
-ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
- size_t NumUnresolvedCalls = UnresolvedCalls.size();
- bool Changed = false;
-
- // State used in the value traversals starting in returned values.
- struct RVState {
- // The map in which we collect return values -> return instrs.
- decltype(ReturnedValues) &RetValsMap;
- // The flag to indicate a change.
- bool &Changed;
- // The return instrs we come from.
- SmallSetVector<ReturnInst *, 4> RetInsts;
- };
-
- // Callback for a leaf value returned by the associated function.
- auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool {
- auto Size = RVS.RetValsMap[&Val].size();
- RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end());
- bool Inserted = RVS.RetValsMap[&Val].size() != Size;
- RVS.Changed |= Inserted;
- LLVM_DEBUG({
- if (Inserted)
- dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val
- << " => " << RVS.RetInsts.size() << "\n";
- });
+ return isAssumedDead(IRP, &AA, FnLivenessAA, CheckBBLivenessOnly, DepClass);
+}
+
+bool Attributor::isAssumedDead(const Use &U,
+ const AbstractAttribute *QueryingAA,
+ const AAIsDead *FnLivenessAA,
+ bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ Instruction *UserI = dyn_cast<Instruction>(U.getUser());
+ if (!UserI)
+ return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
+ CheckBBLivenessOnly, DepClass);
+
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ // For call site argument uses we can check if the argument is
+ // unused/dead.
+ if (CB->isArgOperand(&U)) {
+ const IRPosition &CSArgPos =
+ IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
+ return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA,
+ CheckBBLivenessOnly, DepClass);
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) {
+ const IRPosition &RetPos = IRPosition::returned(*RI->getFunction());
+ return isAssumedDead(RetPos, QueryingAA, FnLivenessAA, CheckBBLivenessOnly,
+ DepClass);
+ } else if (PHINode *PHI = dyn_cast<PHINode>(UserI)) {
+ BasicBlock *IncomingBB = PHI->getIncomingBlock(U);
+ return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA,
+ CheckBBLivenessOnly, DepClass);
+ }
+
+ return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA,
+ CheckBBLivenessOnly, DepClass);
+}
+
+bool Attributor::isAssumedDead(const Instruction &I,
+ const AbstractAttribute *QueryingAA,
+ const AAIsDead *FnLivenessAA,
+ bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ if (!FnLivenessAA)
+ FnLivenessAA = lookupAAFor<AAIsDead>(IRPosition::function(*I.getFunction()),
+ QueryingAA,
+ /* TrackDependence */ false);
+
+ // If we have a context instruction and a liveness AA we use it.
+ if (FnLivenessAA &&
+ FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() &&
+ FnLivenessAA->isAssumedDead(&I)) {
+ if (QueryingAA)
+ recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
return true;
- };
-
- // Helper method to invoke the generic value traversal.
- auto VisitReturnedValue = [&](Value &RV, RVState &RVS) {
- IRPosition RetValPos = IRPosition::value(RV);
- return genericValueTraversal<AAReturnedValues, RVState>(A, RetValPos, *this,
- RVS, VisitValueCB);
- };
-
- // Callback for all "return intructions" live in the associated function.
- auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) {
- ReturnInst &Ret = cast<ReturnInst>(I);
- RVState RVS({ReturnedValues, Changed, {}});
- RVS.RetInsts.insert(&Ret);
- return VisitReturnedValue(*Ret.getReturnValue(), RVS);
- };
-
- // Start by discovering returned values from all live returned instructions in
- // the associated function.
- if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}))
- return indicatePessimisticFixpoint();
-
- // Once returned values "directly" present in the code are handled we try to
- // resolve returned calls.
- decltype(ReturnedValues) NewRVsMap;
- for (auto &It : ReturnedValues) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first
- << " by #" << It.second.size() << " RIs\n");
- CallBase *CB = dyn_cast<CallBase>(It.first);
- if (!CB || UnresolvedCalls.count(CB))
- continue;
-
- if (!CB->getCalledFunction()) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
- << "\n");
- UnresolvedCalls.insert(CB);
- continue;
- }
-
- // TODO: use the function scope once we have call site AAReturnedValues.
- const auto &RetValAA = A.getAAFor<AAReturnedValues>(
- *this, IRPosition::function(*CB->getCalledFunction()));
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: "
- << RetValAA << "\n");
-
- // Skip dead ends, thus if we do not know anything about the returned
- // call we mark it as unresolved and it will stay that way.
- if (!RetValAA.getState().isValidState()) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
- << "\n");
- UnresolvedCalls.insert(CB);
- continue;
- }
-
- // Do not try to learn partial information. If the callee has unresolved
- // return values we will treat the call as unresolved/opaque.
- auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls();
- if (!RetValAAUnresolvedCalls.empty()) {
- UnresolvedCalls.insert(CB);
- continue;
- }
-
- // Now check if we can track transitively returned values. If possible, thus
- // if all return value can be represented in the current scope, do so.
- bool Unresolved = false;
- for (auto &RetValAAIt : RetValAA.returned_values()) {
- Value *RetVal = RetValAAIt.first;
- if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) ||
- isa<Constant>(RetVal))
- continue;
- // Anything that did not fit in the above categories cannot be resolved,
- // mark the call as unresolved.
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value "
- "cannot be translated: "
- << *RetVal << "\n");
- UnresolvedCalls.insert(CB);
- Unresolved = true;
- break;
- }
-
- if (Unresolved)
- continue;
-
- // Now track transitively returned values.
- unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB];
- if (NumRetAA == RetValAA.getNumReturnValues()) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not "
- "changed since it was seen last\n");
- continue;
- }
- NumRetAA = RetValAA.getNumReturnValues();
-
- for (auto &RetValAAIt : RetValAA.returned_values()) {
- Value *RetVal = RetValAAIt.first;
- if (Argument *Arg = dyn_cast<Argument>(RetVal)) {
- // Arguments are mapped to call site operands and we begin the traversal
- // again.
- bool Unused = false;
- RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
- VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS);
- continue;
- } else if (isa<CallBase>(RetVal)) {
- // Call sites are resolved by the callee attribute over time, no need to
- // do anything for us.
- continue;
- } else if (isa<Constant>(RetVal)) {
- // Constants are valid everywhere, we can simply take them.
- NewRVsMap[RetVal].insert(It.second.begin(), It.second.end());
- continue;
- }
- }
- }
-
- // To avoid modifications to the ReturnedValues map while we iterate over it
- // we kept record of potential new entries in a copy map, NewRVsMap.
- for (auto &It : NewRVsMap) {
- assert(!It.second.empty() && "Entry does not add anything.");
- auto &ReturnInsts = ReturnedValues[It.first];
- for (ReturnInst *RI : It.second)
- if (ReturnInsts.insert(RI)) {
- LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
- << *It.first << " => " << *RI << "\n");
- Changed = true;
- }
- }
-
- Changed |= (NumUnresolvedCalls != UnresolvedCalls.size());
- return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
-}
-
-struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
- AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) }
-};
-
-/// Returned values information for a call sites.
-struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
- AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites instead of
- // redirecting requests to the callee.
- llvm_unreachable("Abstract attributes for returned values are not "
- "supported for call sites yet!");
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- return indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-};
-
-/// ------------------------ NoSync Function Attribute -------------------------
-
-struct AANoSyncImpl : AANoSync {
- AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {}
-
- const std::string getAsStr() const override {
- return getAssumed() ? "nosync" : "may-sync";
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
-
- /// Helper function used to determine whether an instruction is non-relaxed
- /// atomic. In other words, if an atomic instruction does not have unordered
- /// or monotonic ordering
- static bool isNonRelaxedAtomic(Instruction *I);
-
- /// Helper function used to determine whether an instruction is volatile.
- static bool isVolatile(Instruction *I);
-
- /// Helper function uset to check if intrinsic is volatile (memcpy, memmove,
- /// memset).
- static bool isNoSyncIntrinsic(Instruction *I);
-};
-
-bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
- if (!I->isAtomic())
- return false;
-
- AtomicOrdering Ordering;
- switch (I->getOpcode()) {
- case Instruction::AtomicRMW:
- Ordering = cast<AtomicRMWInst>(I)->getOrdering();
- break;
- case Instruction::Store:
- Ordering = cast<StoreInst>(I)->getOrdering();
- break;
- case Instruction::Load:
- Ordering = cast<LoadInst>(I)->getOrdering();
- break;
- case Instruction::Fence: {
- auto *FI = cast<FenceInst>(I);
- if (FI->getSyncScopeID() == SyncScope::SingleThread)
- return false;
- Ordering = FI->getOrdering();
- break;
- }
- case Instruction::AtomicCmpXchg: {
- AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering();
- AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering();
- // Only if both are relaxed, than it can be treated as relaxed.
- // Otherwise it is non-relaxed.
- if (Success != AtomicOrdering::Unordered &&
- Success != AtomicOrdering::Monotonic)
- return true;
- if (Failure != AtomicOrdering::Unordered &&
- Failure != AtomicOrdering::Monotonic)
- return true;
- return false;
- }
- default:
- llvm_unreachable(
- "New atomic operations need to be known in the attributor.");
- }
-
- // Relaxed.
- if (Ordering == AtomicOrdering::Unordered ||
- Ordering == AtomicOrdering::Monotonic)
- return false;
- return true;
-}
-
-/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
-/// FIXME: We should ipmrove the handling of intrinsics.
-bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- /// Element wise atomic memory intrinsics are can only be unordered,
- /// therefore nosync.
- case Intrinsic::memset_element_unordered_atomic:
- case Intrinsic::memmove_element_unordered_atomic:
- case Intrinsic::memcpy_element_unordered_atomic:
- return true;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- if (!cast<MemIntrinsic>(II)->isVolatile())
- return true;
- return false;
- default:
- return false;
- }
}
- return false;
-}
-
-bool AANoSyncImpl::isVolatile(Instruction *I) {
- assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
- "Calls should not be checked here");
-
- switch (I->getOpcode()) {
- case Instruction::AtomicRMW:
- return cast<AtomicRMWInst>(I)->isVolatile();
- case Instruction::Store:
- return cast<StoreInst>(I)->isVolatile();
- case Instruction::Load:
- return cast<LoadInst>(I)->isVolatile();
- case Instruction::AtomicCmpXchg:
- return cast<AtomicCmpXchgInst>(I)->isVolatile();
- default:
- return false;
- }
-}
-
-ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
-
- auto CheckRWInstForNoSync = [&](Instruction &I) {
- /// We are looking for volatile instructions or Non-Relaxed atomics.
- /// FIXME: We should improve the handling of intrinsics.
-
- if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
- return true;
-
- if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
- if (ICS.hasFnAttr(Attribute::NoSync))
- return true;
-
- const auto &NoSyncAA =
- A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS));
- if (NoSyncAA.isAssumedNoSync())
- return true;
- return false;
- }
-
- if (!isVolatile(&I) && !isNonRelaxedAtomic(&I))
- return true;
+ if (CheckBBLivenessOnly)
return false;
- };
-
- auto CheckForNoSync = [&](Instruction &I) {
- // At this point we handled all read/write effects and they are all
- // nosync, so they can be skipped.
- if (I.mayReadOrWriteMemory())
- return true;
-
- // non-convergent and readnone imply nosync.
- return !ImmutableCallSite(&I).isConvergent();
- };
-
- if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
- !A.checkForAllCallLikeInstructions(CheckForNoSync, *this))
- return indicatePessimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
-}
-
-struct AANoSyncFunction final : public AANoSyncImpl {
- AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) }
-};
-
-/// NoSync attribute deduction for a call sites.
-struct AANoSyncCallSite final : AANoSyncImpl {
- AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoSyncImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoSync::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
-};
-
-/// ------------------------ No-Free Attributes ----------------------------
-
-struct AANoFreeImpl : public AANoFree {
- AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- auto CheckForNoFree = [&](Instruction &I) {
- ImmutableCallSite ICS(&I);
- if (ICS.hasFnAttr(Attribute::NoFree))
- return true;
-
- const auto &NoFreeAA =
- A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS));
- return NoFreeAA.isAssumedNoFree();
- };
-
- if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumed() ? "nofree" : "may-free";
- }
-};
-
-struct AANoFreeFunction final : public AANoFreeImpl {
- AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) }
-};
-
-/// NoFree attribute deduction for a call sites.
-struct AANoFreeCallSite final : AANoFreeImpl {
- AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoFreeImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoFree::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
-};
-
-/// NoFree attribute for floating values.
-struct AANoFreeFloating : AANoFreeImpl {
- AANoFreeFloating(const IRPosition &IRP) : AANoFreeImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override{STATS_DECLTRACK_FLOATING_ATTR(nofree)}
-
- /// See Abstract Attribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- const IRPosition &IRP = getIRPosition();
-
- const auto &NoFreeAA =
- A.getAAFor<AANoFree>(*this, IRPosition::function_scope(IRP));
- if (NoFreeAA.isAssumedNoFree())
- return ChangeStatus::UNCHANGED;
-
- Value &AssociatedValue = getIRPosition().getAssociatedValue();
- auto Pred = [&](const Use &U, bool &Follow) -> bool {
- Instruction *UserI = cast<Instruction>(U.getUser());
- if (auto *CB = dyn_cast<CallBase>(UserI)) {
- if (CB->isBundleOperand(&U))
- return false;
- if (!CB->isArgOperand(&U))
- return true;
- unsigned ArgNo = CB->getArgOperandNo(&U);
-
- const auto &NoFreeArg = A.getAAFor<AANoFree>(
- *this, IRPosition::callsite_argument(*CB, ArgNo));
- return NoFreeArg.isAssumedNoFree();
- }
-
- if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
- isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
- Follow = true;
- return true;
- }
-
- // Unknown user.
- return false;
- };
- if (!A.checkForAllUses(Pred, *this, AssociatedValue))
- return indicatePessimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
- }
-};
-
-/// NoFree attribute for a call site argument.
-struct AANoFreeArgument final : AANoFreeFloating {
- AANoFreeArgument(const IRPosition &IRP) : AANoFreeFloating(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofree) }
-};
-
-/// NoFree attribute for call site arguments.
-struct AANoFreeCallSiteArgument final : AANoFreeFloating {
- AANoFreeCallSiteArgument(const IRPosition &IRP) : AANoFreeFloating(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Argument *Arg = getAssociatedArgument();
- if (!Arg)
- return indicatePessimisticFixpoint();
- const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoFree::StateType &>(ArgAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)};
-};
-
-/// NoFree attribute for function return value.
-struct AANoFreeReturned final : AANoFreeFloating {
- AANoFreeReturned(const IRPosition &IRP) : AANoFreeFloating(IRP) {
- llvm_unreachable("NoFree is not applicable to function returns!");
- }
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- llvm_unreachable("NoFree is not applicable to function returns!");
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable("NoFree is not applicable to function returns!");
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-};
-
-/// NoFree attribute deduction for a call site return value.
-struct AANoFreeCallSiteReturned final : AANoFreeFloating {
- AANoFreeCallSiteReturned(const IRPosition &IRP) : AANoFreeFloating(IRP) {}
-
- ChangeStatus manifest(Attributor &A) override {
- return ChangeStatus::UNCHANGED;
- }
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) }
-};
-
-/// ------------------------ NonNull Argument Attribute ------------------------
-static int64_t getKnownNonNullAndDerefBytesForUse(
- Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue,
- const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
- TrackUse = false;
-
- const Value *UseV = U->get();
- if (!UseV->getType()->isPointerTy())
- return 0;
-
- Type *PtrTy = UseV->getType();
- const Function *F = I->getFunction();
- bool NullPointerIsDefined =
- F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
- const DataLayout &DL = A.getInfoCache().getDL();
- if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
- if (ICS.isBundleOperand(U))
- return 0;
-
- if (ICS.isCallee(U)) {
- IsNonNull |= !NullPointerIsDefined;
- return 0;
- }
-
- unsigned ArgNo = ICS.getArgumentNo(U);
- IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
- // As long as we only use known information there is no need to track
- // dependences here.
- auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP,
- /* TrackDependence */ false);
- IsNonNull |= DerefAA.isKnownNonNull();
- return DerefAA.getKnownDereferenceableBytes();
- }
-
- // We need to follow common pointer manipulation uses to the accesses they
- // feed into. We can try to be smart to avoid looking through things we do not
- // like for now, e.g., non-inbounds GEPs.
- if (isa<CastInst>(I)) {
- TrackUse = true;
- return 0;
- }
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- if (GEP->hasAllConstantIndices()) {
- TrackUse = true;
- return 0;
- }
-
- int64_t Offset;
- if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) {
- if (Base == &AssociatedValue &&
- Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
- int64_t DerefBytes =
- (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset;
-
- IsNonNull |= !NullPointerIsDefined;
- return std::max(int64_t(0), DerefBytes);
- }
- }
-
- /// Corner case when an offset is 0.
- if (const Value *Base = getBasePointerOfAccessPointerOperand(
- I, Offset, DL, /*AllowNonInbounds*/ true)) {
- if (Offset == 0 && Base == &AssociatedValue &&
- Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
- int64_t DerefBytes =
- (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
- IsNonNull |= !NullPointerIsDefined;
- return std::max(int64_t(0), DerefBytes);
- }
- }
-
- return 0;
-}
-
-struct AANonNullImpl : AANonNull {
- AANonNullImpl(const IRPosition &IRP)
- : AANonNull(IRP),
- NullIsDefined(NullPointerIsDefined(
- getAnchorScope(),
- getAssociatedValue().getType()->getPointerAddressSpace())) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (!NullIsDefined &&
- hasAttr({Attribute::NonNull, Attribute::Dereferenceable}))
- indicateOptimisticFixpoint();
- else if (isa<ConstantPointerNull>(getAssociatedValue()))
- indicatePessimisticFixpoint();
- else
- AANonNull::initialize(A);
- }
-
- /// See AAFromMustBeExecutedContext
- bool followUse(Attributor &A, const Use *U, const Instruction *I) {
- bool IsNonNull = false;
- bool TrackUse = false;
- getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
- IsNonNull, TrackUse);
- setKnown(IsNonNull);
- return TrackUse;
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumed() ? "nonnull" : "may-null";
- }
-
- /// Flag to determine if the underlying value can be null and still allow
- /// valid accesses.
- const bool NullIsDefined;
-};
-
-/// NonNull attribute for a floating value.
-struct AANonNullFloating
- : AAFromMustBeExecutedContext<AANonNull, AANonNullImpl> {
- using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>;
- AANonNullFloating(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Change = Base::updateImpl(A);
- if (isKnownNonNull())
- return Change;
-
- if (!NullIsDefined) {
- const auto &DerefAA =
- A.getAAFor<AADereferenceable>(*this, getIRPosition());
- if (DerefAA.getAssumedDereferenceableBytes())
- return Change;
- }
-
- const DataLayout &DL = A.getDataLayout();
-
- DominatorTree *DT = nullptr;
- InformationCache &InfoCache = A.getInfoCache();
- if (const Function *Fn = getAnchorScope())
- DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn);
-
- auto VisitValueCB = [&](Value &V, AANonNull::StateType &T,
- bool Stripped) -> bool {
- const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V));
- if (!Stripped && this == &AA) {
- if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, getCtxI(), DT))
- T.indicatePessimisticFixpoint();
- } else {
- // Use abstract attribute information.
- const AANonNull::StateType &NS =
- static_cast<const AANonNull::StateType &>(AA.getState());
- T ^= NS;
- }
- return T.isValidState();
- };
-
- StateType T;
- if (!genericValueTraversal<AANonNull, StateType>(A, getIRPosition(), *this,
- T, VisitValueCB))
- return indicatePessimisticFixpoint();
-
- return clampStateAndIndicateChange(getState(), T);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
-};
-
-/// NonNull attribute for function return value.
-struct AANonNullReturned final
- : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> {
- AANonNullReturned(const IRPosition &IRP)
- : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
-};
-
-/// NonNull attribute for function argument.
-struct AANonNullArgument final
- : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
- AANonNullImpl> {
- AANonNullArgument(const IRPosition &IRP)
- : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
- AANonNullImpl>(
- IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
-};
-
-struct AANonNullCallSiteArgument final : AANonNullFloating {
- AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) }
-};
-
-/// NonNull attribute for a call site return position.
-struct AANonNullCallSiteReturned final
- : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
- AANonNullImpl> {
- AANonNullCallSiteReturned(const IRPosition &IRP)
- : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
- AANonNullImpl>(
- IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
-};
-
-/// ------------------------ No-Recurse Attributes ----------------------------
-
-struct AANoRecurseImpl : public AANoRecurse {
- AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {}
-
- /// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
- return getAssumed() ? "norecurse" : "may-recurse";
- }
-};
-
-struct AANoRecurseFunction final : AANoRecurseImpl {
- AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoRecurseImpl::initialize(A);
- if (const Function *F = getAnchorScope())
- if (A.getInfoCache().getSccSize(*F) == 1)
- return;
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
-
- auto CheckForNoRecurse = [&](Instruction &I) {
- ImmutableCallSite ICS(&I);
- if (ICS.hasFnAttr(Attribute::NoRecurse))
- return true;
-
- const auto &NoRecurseAA =
- A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS));
- if (!NoRecurseAA.isAssumedNoRecurse())
- return false;
-
- // Recursion to the same function
- if (ICS.getCalledFunction() == getAnchorScope())
- return false;
-
- return true;
- };
-
- if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
-
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) }
-};
-
-/// NoRecurse attribute deduction for a call sites.
-struct AANoRecurseCallSite final : AANoRecurseImpl {
- AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoRecurseImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoRecurse::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
-};
-
-/// -------------------- Undefined-Behavior Attributes ------------------------
-
-struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
- AAUndefinedBehaviorImpl(const IRPosition &IRP) : AAUndefinedBehavior(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- // through a pointer (i.e. also branches etc.)
- ChangeStatus updateImpl(Attributor &A) override {
- const size_t UBPrevSize = KnownUBInsts.size();
- const size_t NoUBPrevSize = AssumedNoUBInsts.size();
-
- auto InspectMemAccessInstForUB = [&](Instruction &I) {
- // Skip instructions that are already saved.
- if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
- return true;
-
- // If we reach here, we know we have an instruction
- // that accesses memory through a pointer operand,
- // for which getPointerOperand() should give it to us.
- const Value *PtrOp =
- Attributor::getPointerOperand(&I, /* AllowVolatile */ true);
- assert(PtrOp &&
- "Expected pointer operand of memory accessing instruction");
-
- // A memory access through a pointer is considered UB
- // only if the pointer has constant null value.
- // TODO: Expand it to not only check constant values.
- if (!isa<ConstantPointerNull>(PtrOp)) {
- AssumedNoUBInsts.insert(&I);
- return true;
- }
- const Type *PtrTy = PtrOp->getType();
-
- // Because we only consider instructions inside functions,
- // assume that a parent function exists.
- const Function *F = I.getFunction();
-
- // A memory access using constant null pointer is only considered UB
- // if null pointer is _not_ defined for the target platform.
- if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()))
- AssumedNoUBInsts.insert(&I);
- else
- KnownUBInsts.insert(&I);
- return true;
- };
-
- auto InspectBrInstForUB = [&](Instruction &I) {
- // A conditional branch instruction is considered UB if it has `undef`
- // condition.
-
- // Skip instructions that are already saved.
- if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
- return true;
-
- // We know we have a branch instruction.
- auto BrInst = cast<BranchInst>(&I);
-
- // Unconditional branches are never considered UB.
- if (BrInst->isUnconditional())
- return true;
-
- // Either we stopped and the appropriate action was taken,
- // or we got back a simplified value to continue.
- Optional<Value *> SimplifiedCond =
- stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
- if (!SimplifiedCond.hasValue())
- return true;
- AssumedNoUBInsts.insert(&I);
- return true;
- };
-
- A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
- {Instruction::Load, Instruction::Store,
- Instruction::AtomicCmpXchg,
- Instruction::AtomicRMW});
- A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br});
- if (NoUBPrevSize != AssumedNoUBInsts.size() ||
- UBPrevSize != KnownUBInsts.size())
- return ChangeStatus::CHANGED;
- return ChangeStatus::UNCHANGED;
- }
-
- bool isKnownToCauseUB(Instruction *I) const override {
- return KnownUBInsts.count(I);
- }
-
- bool isAssumedToCauseUB(Instruction *I) const override {
- // In simple words, if an instruction is not in the assumed to _not_
- // cause UB, then it is assumed UB (that includes those
- // in the KnownUBInsts set). The rest is boilerplate
- // is to ensure that it is one of the instructions we test
- // for UB.
-
- switch (I->getOpcode()) {
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::AtomicCmpXchg:
- case Instruction::AtomicRMW:
- return !AssumedNoUBInsts.count(I);
- case Instruction::Br: {
- auto BrInst = cast<BranchInst>(I);
- if (BrInst->isUnconditional())
- return false;
- return !AssumedNoUBInsts.count(I);
- } break;
- default:
- return false;
- }
+ const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>(
+ IRPosition::value(I), QueryingAA, /* TrackDependence */ false);
+ // Don't check liveness for AAIsDead.
+ if (QueryingAA == &IsDeadAA)
return false;
- }
- ChangeStatus manifest(Attributor &A) override {
- if (KnownUBInsts.empty())
- return ChangeStatus::UNCHANGED;
- for (Instruction *I : KnownUBInsts)
- A.changeToUnreachableAfterManifest(I);
- return ChangeStatus::CHANGED;
- }
-
- /// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
- return getAssumed() ? "undefined-behavior" : "no-ub";
- }
-
- /// Note: The correctness of this analysis depends on the fact that the
- /// following 2 sets will stop changing after some point.
- /// "Change" here means that their size changes.
- /// The size of each set is monotonically increasing
- /// (we only add items to them) and it is upper bounded by the number of
- /// instructions in the processed function (we can never save more
- /// elements in either set than this number). Hence, at some point,
- /// they will stop increasing.
- /// Consequently, at some point, both sets will have stopped
- /// changing, effectively making the analysis reach a fixpoint.
-
- /// Note: These 2 sets are disjoint and an instruction can be considered
- /// one of 3 things:
- /// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in
- /// the KnownUBInsts set.
- /// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior
- /// has a reason to assume it).
- /// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior
- /// could not find a reason to assume or prove that it can cause UB,
- /// hence it assumes it doesn't. We have a set for these instructions
- /// so that we don't reprocess them in every update.
- /// Note however that instructions in this set may cause UB.
-
-protected:
- /// A set of all live instructions _known_ to cause UB.
- SmallPtrSet<Instruction *, 8> KnownUBInsts;
-
-private:
- /// A set of all the (live) instructions that are assumed to _not_ cause UB.
- SmallPtrSet<Instruction *, 8> AssumedNoUBInsts;
-
- // Should be called on updates in which if we're processing an instruction
- // \p I that depends on a value \p V, one of the following has to happen:
- // - If the value is assumed, then stop.
- // - If the value is known but undef, then consider it UB.
- // - Otherwise, do specific processing with the simplified value.
- // We return None in the first 2 cases to signify that an appropriate
- // action was taken and the caller should stop.
- // Otherwise, we return the simplified value that the caller should
- // use for specific processing.
- Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V,
- Instruction *I) {
- const auto &ValueSimplifyAA =
- A.getAAFor<AAValueSimplify>(*this, IRPosition::value(*V));
- Optional<Value *> SimplifiedV =
- ValueSimplifyAA.getAssumedSimplifiedValue(A);
- if (!ValueSimplifyAA.isKnown()) {
- // Don't depend on assumed values.
- return llvm::None;
- }
- if (!SimplifiedV.hasValue()) {
- // If it is known (which we tested above) but it doesn't have a value,
- // then we can assume `undef` and hence the instruction is UB.
- KnownUBInsts.insert(I);
- return llvm::None;
- }
- Value *Val = SimplifiedV.getValue();
- if (isa<UndefValue>(Val)) {
- KnownUBInsts.insert(I);
- return llvm::None;
- }
- return Val;
- }
-};
-
-struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
- AAUndefinedBehaviorFunction(const IRPosition &IRP)
- : AAUndefinedBehaviorImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECL(UndefinedBehaviorInstruction, Instruction,
- "Number of instructions known to have UB");
- BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) +=
- KnownUBInsts.size();
+ if (IsDeadAA.isAssumedDead()) {
+ if (QueryingAA)
+ recordDependence(IsDeadAA, *QueryingAA, DepClass);
+ return true;
}
-};
-/// ------------------------ Will-Return Attributes ----------------------------
-
-// Helper function that checks whether a function has any cycle.
-// TODO: Replace with more efficent code
-static bool containsCycle(Function &F) {
- SmallPtrSet<BasicBlock *, 32> Visited;
-
- // Traverse BB by dfs and check whether successor is already visited.
- for (BasicBlock *BB : depth_first(&F)) {
- Visited.insert(BB);
- for (auto *SuccBB : successors(BB)) {
- if (Visited.count(SuccBB))
- return true;
- }
- }
return false;
}
-// Helper function that checks the function have a loop which might become an
-// endless loop
-// FIXME: Any cycle is regarded as endless loop for now.
-// We have to allow some patterns.
-static bool containsPossiblyEndlessLoop(Function *F) {
- return !F || !F->hasExactDefinition() || containsCycle(*F);
-}
-
-struct AAWillReturnImpl : public AAWillReturn {
- AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAWillReturn::initialize(A);
-
- Function *F = getAssociatedFunction();
- if (containsPossiblyEndlessLoop(F))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- auto CheckForWillReturn = [&](Instruction &I) {
- IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I));
- const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
- if (WillReturnAA.isKnownWillReturn())
- return true;
- if (!WillReturnAA.isAssumedWillReturn())
- return false;
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos);
- return NoRecurseAA.isAssumedNoRecurse();
- };
-
- if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this))
- return indicatePessimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
- return getAssumed() ? "willreturn" : "may-noreturn";
- }
-};
-
-struct AAWillReturnFunction final : AAWillReturnImpl {
- AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) }
-};
-
-/// WillReturn attribute deduction for a call sites.
-struct AAWillReturnCallSite final : AAWillReturnImpl {
- AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAWillReturnImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AAWillReturn::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
-};
-
-/// -------------------AAReachability Attribute--------------------------
-
-struct AAReachabilityImpl : AAReachability {
- AAReachabilityImpl(const IRPosition &IRP) : AAReachability(IRP) {}
-
- const std::string getAsStr() const override {
- // TODO: Return the number of reachable queries.
- return "reachable";
- }
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override { indicatePessimisticFixpoint(); }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- return indicatePessimisticFixpoint();
- }
-};
-
-struct AAReachabilityFunction final : public AAReachabilityImpl {
- AAReachabilityFunction(const IRPosition &IRP) : AAReachabilityImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); }
-};
-
-/// ------------------------ NoAlias Argument Attribute ------------------------
-
-struct AANoAliasImpl : AANoAlias {
- AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {}
-
- const std::string getAsStr() const override {
- return getAssumed() ? "noalias" : "may-alias";
- }
-};
-
-/// NoAlias attribute for a floating value.
-struct AANoAliasFloating final : AANoAliasImpl {
- AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoAliasImpl::initialize(A);
- Value &Val = getAssociatedValue();
- if (isa<AllocaInst>(Val))
- indicateOptimisticFixpoint();
- if (isa<ConstantPointerNull>(Val) &&
- Val.getType()->getPointerAddressSpace() == 0)
- indicateOptimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Implement this.
- return indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(noalias)
- }
-};
-
-/// NoAlias attribute for an argument.
-struct AANoAliasArgument final
- : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
- using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
- AANoAliasArgument(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::update(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // We have to make sure no-alias on the argument does not break
- // synchronization when this is a callback argument, see also [1] below.
- // If synchronization cannot be affected, we delegate to the base updateImpl
- // function, otherwise we give up for now.
-
- // If the function is no-sync, no-alias cannot break synchronization.
- const auto &NoSyncAA = A.getAAFor<AANoSync>(
- *this, IRPosition::function_scope(getIRPosition()));
- if (NoSyncAA.isAssumedNoSync())
- return Base::updateImpl(A);
-
- // If the argument is read-only, no-alias cannot break synchronization.
- const auto &MemBehaviorAA =
- A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
- if (MemBehaviorAA.isAssumedReadOnly())
- return Base::updateImpl(A);
-
- // If the argument is never passed through callbacks, no-alias cannot break
- // synchronization.
- if (A.checkForAllCallSites(
- [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
- true))
- return Base::updateImpl(A);
-
- // TODO: add no-alias but make sure it doesn't break synchronization by
- // introducing fake uses. See:
- // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel,
- // International Workshop on OpenMP 2018,
- // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf
-
- return indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
-};
-
-struct AANoAliasCallSiteArgument final : AANoAliasImpl {
- AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // See callsite argument attribute and callee argument attribute.
- ImmutableCallSite ICS(&getAnchorValue());
- if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias))
- indicateOptimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // We can deduce "noalias" if the following conditions hold.
- // (i) Associated value is assumed to be noalias in the definition.
- // (ii) Associated value is assumed to be no-capture in all the uses
- // possibly executed before this callsite.
- // (iii) There is no other pointer argument which could alias with the
- // value.
-
- const Value &V = getAssociatedValue();
- const IRPosition IRP = IRPosition::value(V);
-
- // (i) Check whether noalias holds in the definition.
-
- auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
- LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] check definition: " << V
- << " :: " << NoAliasAA << "\n");
-
- if (!NoAliasAA.isAssumedNoAlias())
- return indicatePessimisticFixpoint();
-
- LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V
- << " is assumed NoAlias in the definition\n");
-
- // (ii) Check whether the value is captured in the scope using AANoCapture.
- // FIXME: This is conservative though, it is better to look at CFG and
- // check only uses possibly executed before this callsite.
-
- auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
- if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
- LLVM_DEBUG(
- dbgs() << "[Attributor][AANoAliasCSArg] " << V
- << " cannot be noalias as it is potentially captured\n");
- return indicatePessimisticFixpoint();
- }
-
- // (iii) Check there is no other pointer argument which could alias with the
- // value.
- // TODO: AbstractCallSite
- ImmutableCallSite ICS(&getAnchorValue());
- for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) {
- if (getArgNo() == (int)i)
- continue;
- const Value *ArgOp = ICS.getArgOperand(i);
- if (!ArgOp->getType()->isPointerTy())
- continue;
-
- if (const Function *F = getAnchorScope()) {
- if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) {
- bool IsAliasing = !AAR->isNoAlias(&getAssociatedValue(), ArgOp);
- LLVM_DEBUG(dbgs()
- << "[Attributor][NoAliasCSArg] Check alias between "
- "callsite arguments "
- << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " "
- << getAssociatedValue() << " " << *ArgOp << " => "
- << (IsAliasing ? "" : "no-") << "alias \n");
-
- if (!IsAliasing)
- continue;
- }
- }
- return indicatePessimisticFixpoint();
- }
-
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) }
-};
-
-/// NoAlias attribute for function return value.
-struct AANoAliasReturned final : AANoAliasImpl {
- AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- virtual ChangeStatus updateImpl(Attributor &A) override {
-
- auto CheckReturnValue = [&](Value &RV) -> bool {
- if (Constant *C = dyn_cast<Constant>(&RV))
- if (C->isNullValue() || isa<UndefValue>(C))
- return true;
-
- /// For now, we can only deduce noalias if we have call sites.
- /// FIXME: add more support.
- ImmutableCallSite ICS(&RV);
- if (!ICS)
- return false;
-
- const IRPosition &RVPos = IRPosition::value(RV);
- const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos);
- if (!NoAliasAA.isAssumedNoAlias())
- return false;
-
- const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos);
- return NoCaptureAA.isAssumedNoCaptureMaybeReturned();
- };
-
- if (!A.checkForAllReturnedValues(CheckReturnValue, *this))
- return indicatePessimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) }
-};
-
-/// NoAlias attribute deduction for a call site return value.
-struct AANoAliasCallSiteReturned final : AANoAliasImpl {
- AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoAliasImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::returned(*F);
- auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
-};
-
-/// -------------------AAIsDead Function Attribute-----------------------
-
-struct AAIsDeadValueImpl : public AAIsDead {
- AAIsDeadValueImpl(const IRPosition &IRP) : AAIsDead(IRP) {}
-
- /// See AAIsDead::isAssumedDead().
- bool isAssumedDead() const override { return getAssumed(); }
-
- /// See AAIsDead::isAssumedDead(BasicBlock *).
- bool isAssumedDead(const BasicBlock *BB) const override { return false; }
-
- /// See AAIsDead::isKnownDead(BasicBlock *).
- bool isKnownDead(const BasicBlock *BB) const override { return false; }
-
- /// See AAIsDead::isAssumedDead(Instruction *I).
- bool isAssumedDead(const Instruction *I) const override {
- return I == getCtxI() && isAssumedDead();
- }
-
- /// See AAIsDead::isKnownDead(Instruction *I).
- bool isKnownDead(const Instruction *I) const override {
- return I == getCtxI() && getKnown();
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return isAssumedDead() ? "assumed-dead" : "assumed-live";
- }
-};
-
-struct AAIsDeadFloating : public AAIsDeadValueImpl {
- AAIsDeadFloating(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (Instruction *I = dyn_cast<Instruction>(&getAssociatedValue()))
- if (!wouldInstructionBeTriviallyDead(I))
- indicatePessimisticFixpoint();
- if (isa<UndefValue>(getAssociatedValue()))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- auto UsePred = [&](const Use &U, bool &Follow) {
- Instruction *UserI = cast<Instruction>(U.getUser());
- if (CallSite CS = CallSite(UserI)) {
- if (!CS.isArgOperand(&U))
- return false;
- const IRPosition &CSArgPos =
- IRPosition::callsite_argument(CS, CS.getArgumentNo(&U));
- const auto &CSArgIsDead = A.getAAFor<AAIsDead>(*this, CSArgPos);
- return CSArgIsDead.isAssumedDead();
- }
- if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) {
- const IRPosition &RetPos = IRPosition::returned(*RI->getFunction());
- const auto &RetIsDeadAA = A.getAAFor<AAIsDead>(*this, RetPos);
- return RetIsDeadAA.isAssumedDead();
- }
- Follow = true;
- return wouldInstructionBeTriviallyDead(UserI);
- };
-
- if (!A.checkForAllUses(UsePred, *this, getAssociatedValue()))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- Value &V = getAssociatedValue();
- if (auto *I = dyn_cast<Instruction>(&V))
- if (wouldInstructionBeTriviallyDead(I)) {
- A.deleteAfterManifest(*I);
- return ChangeStatus::CHANGED;
- }
-
- if (V.use_empty())
- return ChangeStatus::UNCHANGED;
-
- UndefValue &UV = *UndefValue::get(V.getType());
- bool AnyChange = A.changeValueAfterManifest(V, UV);
- return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(IsDead)
- }
-};
-
-struct AAIsDeadArgument : public AAIsDeadFloating {
- AAIsDeadArgument(const IRPosition &IRP) : AAIsDeadFloating(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (!getAssociatedFunction()->hasExactDefinition())
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = AAIsDeadFloating::manifest(A);
- Argument &Arg = *getAssociatedArgument();
- if (Arg.getParent()->hasLocalLinkage())
- if (A.registerFunctionSignatureRewrite(
- Arg, /* ReplacementTypes */ {},
- Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
- Attributor::ArgumentReplacementInfo::ACSRepairCBTy{}))
- return ChangeStatus::CHANGED;
- return Changed;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(IsDead) }
-};
-
-struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
- AAIsDeadCallSiteArgument(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (isa<UndefValue>(getAssociatedValue()))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Argument *Arg = getAssociatedArgument();
- if (!Arg)
- return indicatePessimisticFixpoint();
- const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(), static_cast<const AAIsDead::StateType &>(ArgAA.getState()));
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- CallBase &CB = cast<CallBase>(getAnchorValue());
- Use &U = CB.getArgOperandUse(getArgNo());
- assert(!isa<UndefValue>(U.get()) &&
- "Expected undef values to be filtered out!");
- UndefValue &UV = *UndefValue::get(U->getType());
- if (A.changeUseAfterManifest(U, UV))
- return ChangeStatus::CHANGED;
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(IsDead) }
-};
-
-struct AAIsDeadReturned : public AAIsDeadValueImpl {
- AAIsDeadReturned(const IRPosition &IRP) : AAIsDeadValueImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
-
- auto PredForCallSite = [&](AbstractCallSite ACS) {
- if (ACS.isCallbackCall())
- return false;
- const IRPosition &CSRetPos =
- IRPosition::callsite_returned(ACS.getCallSite());
- const auto &RetIsDeadAA = A.getAAFor<AAIsDead>(*this, CSRetPos);
- return RetIsDeadAA.isAssumedDead();
- };
-
- if (!A.checkForAllCallSites(PredForCallSite, *this, true))
- return indicatePessimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- // TODO: Rewrite the signature to return void?
- bool AnyChange = false;
- UndefValue &UV = *UndefValue::get(getAssociatedFunction()->getReturnType());
- auto RetInstPred = [&](Instruction &I) {
- ReturnInst &RI = cast<ReturnInst>(I);
- if (!isa<UndefValue>(RI.getReturnValue()))
- AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV);
- return true;
- };
- A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret});
- return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(IsDead) }
-};
-
-struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
- AAIsDeadCallSiteReturned(const IRPosition &IRP) : AAIsDeadFloating(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(IsDead) }
-};
-
-struct AAIsDeadFunction : public AAIsDead {
- AAIsDeadFunction(const IRPosition &IRP) : AAIsDead(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- const Function *F = getAssociatedFunction();
- if (F && !F->isDeclaration()) {
- ToBeExploredFrom.insert(&F->getEntryBlock().front());
- assumeLive(A, F->getEntryBlock());
- }
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
- std::to_string(getAssociatedFunction()->size()) + "][#TBEP " +
- std::to_string(ToBeExploredFrom.size()) + "][#KDE " +
- std::to_string(KnownDeadEnds.size()) + "]";
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- assert(getState().isValidState() &&
- "Attempted to manifest an invalid state!");
-
- ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
- Function &F = *getAssociatedFunction();
-
- if (AssumedLiveBlocks.empty()) {
- A.deleteAfterManifest(F);
- return ChangeStatus::CHANGED;
- }
-
- // Flag to determine if we can change an invoke to a call assuming the
- // callee is nounwind. This is not possible if the personality of the
- // function allows to catch asynchronous exceptions.
- bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
-
- KnownDeadEnds.set_union(ToBeExploredFrom);
- for (const Instruction *DeadEndI : KnownDeadEnds) {
- auto *CB = dyn_cast<CallBase>(DeadEndI);
- if (!CB)
- continue;
- const auto &NoReturnAA =
- A.getAAFor<AANoReturn>(*this, IRPosition::callsite_function(*CB));
- bool MayReturn = !NoReturnAA.isAssumedNoReturn();
- if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB)))
- continue;
-
- if (auto *II = dyn_cast<InvokeInst>(DeadEndI))
- A.registerInvokeWithDeadSuccessor(const_cast<InvokeInst &>(*II));
- else
- A.changeToUnreachableAfterManifest(
- const_cast<Instruction *>(DeadEndI->getNextNode()));
- HasChanged = ChangeStatus::CHANGED;
- }
-
- for (BasicBlock &BB : F)
- if (!AssumedLiveBlocks.count(&BB))
- A.deleteAfterManifest(BB);
-
- return HasChanged;
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-
- /// Returns true if the function is assumed dead.
- bool isAssumedDead() const override { return false; }
-
- /// See AAIsDead::isAssumedDead(BasicBlock *).
- bool isAssumedDead(const BasicBlock *BB) const override {
- assert(BB->getParent() == getAssociatedFunction() &&
- "BB must be in the same anchor scope function.");
-
- if (!getAssumed())
- return false;
- return !AssumedLiveBlocks.count(BB);
- }
-
- /// See AAIsDead::isKnownDead(BasicBlock *).
- bool isKnownDead(const BasicBlock *BB) const override {
- return getKnown() && isAssumedDead(BB);
- }
-
- /// See AAIsDead::isAssumed(Instruction *I).
- bool isAssumedDead(const Instruction *I) const override {
- assert(I->getParent()->getParent() == getAssociatedFunction() &&
- "Instruction must be in the same anchor scope function.");
-
- if (!getAssumed())
- return false;
-
- // If it is not in AssumedLiveBlocks then it for sure dead.
- // Otherwise, it can still be after noreturn call in a live block.
- if (!AssumedLiveBlocks.count(I->getParent()))
- return true;
-
- // If it is not after a liveness barrier it is live.
- const Instruction *PrevI = I->getPrevNode();
- while (PrevI) {
- if (KnownDeadEnds.count(PrevI) || ToBeExploredFrom.count(PrevI))
- return true;
- PrevI = PrevI->getPrevNode();
- }
- return false;
- }
-
- /// See AAIsDead::isKnownDead(Instruction *I).
- bool isKnownDead(const Instruction *I) const override {
- return getKnown() && isAssumedDead(I);
- }
-
- /// Determine if \p F might catch asynchronous exceptions.
- static bool mayCatchAsynchronousExceptions(const Function &F) {
- return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
- }
-
- /// Assume \p BB is (partially) live now and indicate to the Attributor \p A
- /// that internal function called from \p BB should now be looked at.
- bool assumeLive(Attributor &A, const BasicBlock &BB) {
- if (!AssumedLiveBlocks.insert(&BB).second)
- return false;
-
- // We assume that all of BB is (probably) live now and if there are calls to
- // internal functions we will assume that those are now live as well. This
- // is a performance optimization for blocks with calls to a lot of internal
- // functions. It can however cause dead functions to be treated as live.
- for (const Instruction &I : BB)
- if (ImmutableCallSite ICS = ImmutableCallSite(&I))
- if (const Function *F = ICS.getCalledFunction())
- if (F->hasLocalLinkage())
- A.markLiveInternalFunction(*F);
+bool Attributor::isAssumedDead(const IRPosition &IRP,
+ const AbstractAttribute *QueryingAA,
+ const AAIsDead *FnLivenessAA,
+ bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ Instruction *CtxI = IRP.getCtxI();
+ if (CtxI &&
+ isAssumedDead(*CtxI, QueryingAA, FnLivenessAA,
+ /* CheckBBLivenessOnly */ true,
+ CheckBBLivenessOnly ? DepClass : DepClassTy::OPTIONAL))
return true;
- }
- /// Collection of instructions that need to be explored again, e.g., we
- /// did assume they do not transfer control to (one of their) successors.
- SmallSetVector<const Instruction *, 8> ToBeExploredFrom;
-
- /// Collection of instructions that are known to not transfer control.
- SmallSetVector<const Instruction *, 8> KnownDeadEnds;
-
- /// Collection of all assumed live BasicBlocks.
- DenseSet<const BasicBlock *> AssumedLiveBlocks;
-};
+ if (CheckBBLivenessOnly)
+ return false;
-static bool
-identifyAliveSuccessors(Attributor &A, const CallBase &CB,
- AbstractAttribute &AA,
- SmallVectorImpl<const Instruction *> &AliveSuccessors) {
- const IRPosition &IPos = IRPosition::callsite_function(CB);
-
- const auto &NoReturnAA = A.getAAFor<AANoReturn>(AA, IPos);
- if (NoReturnAA.isAssumedNoReturn())
- return !NoReturnAA.isKnownNoReturn();
- if (CB.isTerminator())
- AliveSuccessors.push_back(&CB.getSuccessor(0)->front());
+ // If we haven't succeeded we query the specific liveness info for the IRP.
+ const AAIsDead *IsDeadAA;
+ if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE)
+ IsDeadAA = &getOrCreateAAFor<AAIsDead>(
+ IRPosition::callsite_returned(cast<CallBase>(IRP.getAssociatedValue())),
+ QueryingAA, /* TrackDependence */ false);
else
- AliveSuccessors.push_back(CB.getNextNode());
- return false;
-}
-
-static bool
-identifyAliveSuccessors(Attributor &A, const InvokeInst &II,
- AbstractAttribute &AA,
- SmallVectorImpl<const Instruction *> &AliveSuccessors) {
- bool UsedAssumedInformation =
- identifyAliveSuccessors(A, cast<CallBase>(II), AA, AliveSuccessors);
-
- // First, determine if we can change an invoke to a call assuming the
- // callee is nounwind. This is not possible if the personality of the
- // function allows to catch asynchronous exceptions.
- if (AAIsDeadFunction::mayCatchAsynchronousExceptions(*II.getFunction())) {
- AliveSuccessors.push_back(&II.getUnwindDest()->front());
- } else {
- const IRPosition &IPos = IRPosition::callsite_function(II);
- const auto &AANoUnw = A.getAAFor<AANoUnwind>(AA, IPos);
- if (AANoUnw.isAssumedNoUnwind()) {
- UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind();
- } else {
- AliveSuccessors.push_back(&II.getUnwindDest()->front());
- }
- }
- return UsedAssumedInformation;
-}
-
-static Optional<ConstantInt *>
-getAssumedConstant(Attributor &A, const Value &V, AbstractAttribute &AA,
- bool &UsedAssumedInformation) {
- const auto &ValueSimplifyAA =
- A.getAAFor<AAValueSimplify>(AA, IRPosition::value(V));
- Optional<Value *> SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(A);
- UsedAssumedInformation |= !ValueSimplifyAA.isKnown();
- if (!SimplifiedV.hasValue())
- return llvm::None;
- if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue()))
- return llvm::None;
- return dyn_cast_or_null<ConstantInt>(SimplifiedV.getValue());
-}
-
-static bool
-identifyAliveSuccessors(Attributor &A, const BranchInst &BI,
- AbstractAttribute &AA,
- SmallVectorImpl<const Instruction *> &AliveSuccessors) {
- bool UsedAssumedInformation = false;
- if (BI.getNumSuccessors() == 1) {
- AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
- } else {
- Optional<ConstantInt *> CI =
- getAssumedConstant(A, *BI.getCondition(), AA, UsedAssumedInformation);
- if (!CI.hasValue()) {
- // No value yet, assume both edges are dead.
- } else if (CI.getValue()) {
- const BasicBlock *SuccBB =
- BI.getSuccessor(1 - CI.getValue()->getZExtValue());
- AliveSuccessors.push_back(&SuccBB->front());
- } else {
- AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
- AliveSuccessors.push_back(&BI.getSuccessor(1)->front());
- UsedAssumedInformation = false;
- }
- }
- return UsedAssumedInformation;
-}
-
-static bool
-identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
- AbstractAttribute &AA,
- SmallVectorImpl<const Instruction *> &AliveSuccessors) {
- bool UsedAssumedInformation = false;
- Optional<ConstantInt *> CI =
- getAssumedConstant(A, *SI.getCondition(), AA, UsedAssumedInformation);
- if (!CI.hasValue()) {
- // No value yet, assume all edges are dead.
- } else if (CI.getValue()) {
- for (auto &CaseIt : SI.cases()) {
- if (CaseIt.getCaseValue() == CI.getValue()) {
- AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
- return UsedAssumedInformation;
- }
- }
- AliveSuccessors.push_back(&SI.getDefaultDest()->front());
- return UsedAssumedInformation;
- } else {
- for (const BasicBlock *SuccBB : successors(SI.getParent()))
- AliveSuccessors.push_back(&SuccBB->front());
- }
- return UsedAssumedInformation;
-}
-
-ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
- ChangeStatus Change = ChangeStatus::UNCHANGED;
-
- LLVM_DEBUG(dbgs() << "[AAIsDead] Live [" << AssumedLiveBlocks.size() << "/"
- << getAssociatedFunction()->size() << "] BBs and "
- << ToBeExploredFrom.size() << " exploration points and "
- << KnownDeadEnds.size() << " known dead ends\n");
-
- // Copy and clear the list of instructions we need to explore from. It is
- // refilled with instructions the next update has to look at.
- SmallVector<const Instruction *, 8> Worklist(ToBeExploredFrom.begin(),
- ToBeExploredFrom.end());
- decltype(ToBeExploredFrom) NewToBeExploredFrom;
-
- SmallVector<const Instruction *, 8> AliveSuccessors;
- while (!Worklist.empty()) {
- const Instruction *I = Worklist.pop_back_val();
- LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n");
-
- AliveSuccessors.clear();
-
- bool UsedAssumedInformation = false;
- switch (I->getOpcode()) {
- // TODO: look for (assumed) UB to backwards propagate "deadness".
- default:
- if (I->isTerminator()) {
- for (const BasicBlock *SuccBB : successors(I->getParent()))
- AliveSuccessors.push_back(&SuccBB->front());
- } else {
- AliveSuccessors.push_back(I->getNextNode());
- }
- break;
- case Instruction::Call:
- UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I),
- *this, AliveSuccessors);
- break;
- case Instruction::Invoke:
- UsedAssumedInformation = identifyAliveSuccessors(A, cast<InvokeInst>(*I),
- *this, AliveSuccessors);
- break;
- case Instruction::Br:
- UsedAssumedInformation = identifyAliveSuccessors(A, cast<BranchInst>(*I),
- *this, AliveSuccessors);
- break;
- case Instruction::Switch:
- UsedAssumedInformation = identifyAliveSuccessors(A, cast<SwitchInst>(*I),
- *this, AliveSuccessors);
- break;
- }
-
- if (UsedAssumedInformation) {
- NewToBeExploredFrom.insert(I);
- } else {
- Change = ChangeStatus::CHANGED;
- if (AliveSuccessors.empty() ||
- (I->isTerminator() && AliveSuccessors.size() < I->getNumSuccessors()))
- KnownDeadEnds.insert(I);
- }
-
- LLVM_DEBUG(dbgs() << "[AAIsDead] #AliveSuccessors: "
- << AliveSuccessors.size() << " UsedAssumedInformation: "
- << UsedAssumedInformation << "\n");
-
- for (const Instruction *AliveSuccessor : AliveSuccessors) {
- if (!I->isTerminator()) {
- assert(AliveSuccessors.size() == 1 &&
- "Non-terminator expected to have a single successor!");
- Worklist.push_back(AliveSuccessor);
- } else {
- if (assumeLive(A, *AliveSuccessor->getParent()))
- Worklist.push_back(AliveSuccessor);
- }
- }
- }
-
- ToBeExploredFrom = std::move(NewToBeExploredFrom);
-
- // If we know everything is live there is no need to query for liveness.
- // Instead, indicating a pessimistic fixpoint will cause the state to be
- // "invalid" and all queries to be answered conservatively without lookups.
- // To be in this state we have to (1) finished the exploration and (3) not
- // discovered any non-trivial dead end and (2) not ruled unreachable code
- // dead.
- if (ToBeExploredFrom.empty() &&
- getAssociatedFunction()->size() == AssumedLiveBlocks.size() &&
- llvm::all_of(KnownDeadEnds, [](const Instruction *DeadEndI) {
- return DeadEndI->isTerminator() && DeadEndI->getNumSuccessors() == 0;
- }))
- return indicatePessimisticFixpoint();
- return Change;
-}
-
-/// Liveness information for a call sites.
-struct AAIsDeadCallSite final : AAIsDeadFunction {
- AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadFunction(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites instead of
- // redirecting requests to the callee.
- llvm_unreachable("Abstract attributes for liveness are not "
- "supported for call sites yet!");
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- return indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-};
-
-/// -------------------- Dereferenceable Argument Attribute --------------------
-
-template <>
-ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
- const DerefState &R) {
- ChangeStatus CS0 =
- clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState);
- ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState);
- return CS0 | CS1;
-}
-
-struct AADereferenceableImpl : AADereferenceable {
- AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {}
- using StateType = DerefState;
-
- void initialize(Attributor &A) override {
- SmallVector<Attribute, 4> Attrs;
- getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
- Attrs);
- for (const Attribute &Attr : Attrs)
- takeKnownDerefBytesMaximum(Attr.getValueAsInt());
-
- NonNullAA = &A.getAAFor<AANonNull>(*this, getIRPosition());
-
- const IRPosition &IRP = this->getIRPosition();
- bool IsFnInterface = IRP.isFnInterfaceKind();
- const Function *FnScope = IRP.getAnchorScope();
- if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition()))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::getState()
- /// {
- StateType &getState() override { return *this; }
- const StateType &getState() const override { return *this; }
- /// }
-
- /// Helper function for collecting accessed bytes in must-be-executed-context
- void addAccessedBytesForUse(Attributor &A, const Use *U,
- const Instruction *I) {
- const Value *UseV = U->get();
- if (!UseV->getType()->isPointerTy())
- return;
-
- Type *PtrTy = UseV->getType();
- const DataLayout &DL = A.getDataLayout();
- int64_t Offset;
- if (const Value *Base = getBasePointerOfAccessPointerOperand(
- I, Offset, DL, /*AllowNonInbounds*/ true)) {
- if (Base == &getAssociatedValue() &&
- Attributor::getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
- uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType());
- addAccessedBytes(Offset, Size);
- }
- }
- return;
- }
-
- /// See AAFromMustBeExecutedContext
- bool followUse(Attributor &A, const Use *U, const Instruction *I) {
- bool IsNonNull = false;
- bool TrackUse = false;
- int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
- A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
-
- addAccessedBytesForUse(A, U, I);
- takeKnownDerefBytesMaximum(DerefBytes);
- return TrackUse;
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Change = AADereferenceable::manifest(A);
- if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) {
- removeAttrs({Attribute::DereferenceableOrNull});
- return ChangeStatus::CHANGED;
- }
- return Change;
- }
-
- void getDeducedAttributes(LLVMContext &Ctx,
- SmallVectorImpl<Attribute> &Attrs) const override {
- // TODO: Add *_globally support
- if (isAssumedNonNull())
- Attrs.emplace_back(Attribute::getWithDereferenceableBytes(
- Ctx, getAssumedDereferenceableBytes()));
- else
- Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes(
- Ctx, getAssumedDereferenceableBytes()));
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- if (!getAssumedDereferenceableBytes())
- return "unknown-dereferenceable";
- return std::string("dereferenceable") +
- (isAssumedNonNull() ? "" : "_or_null") +
- (isAssumedGlobal() ? "_globally" : "") + "<" +
- std::to_string(getKnownDereferenceableBytes()) + "-" +
- std::to_string(getAssumedDereferenceableBytes()) + ">";
- }
-};
-
-/// Dereferenceable attribute for a floating value.
-struct AADereferenceableFloating
- : AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl> {
- using Base =
- AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl>;
- AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Change = Base::updateImpl(A);
-
- const DataLayout &DL = A.getDataLayout();
-
- auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool {
- unsigned IdxWidth =
- DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
- APInt Offset(IdxWidth, 0);
- const Value *Base =
- V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-
- const auto &AA =
- A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base));
- int64_t DerefBytes = 0;
- if (!Stripped && this == &AA) {
- // Use IR information if we did not strip anything.
- // TODO: track globally.
- bool CanBeNull;
- DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull);
- T.GlobalState.indicatePessimisticFixpoint();
- } else {
- const DerefState &DS = static_cast<const DerefState &>(AA.getState());
- DerefBytes = DS.DerefBytesState.getAssumed();
- T.GlobalState &= DS.GlobalState;
- }
-
- // TODO: Use `AAConstantRange` to infer dereferenceable bytes.
-
- // For now we do not try to "increase" dereferenceability due to negative
- // indices as we first have to come up with code to deal with loops and
- // for overflows of the dereferenceable bytes.
- int64_t OffsetSExt = Offset.getSExtValue();
- if (OffsetSExt < 0)
- OffsetSExt = 0;
-
- T.takeAssumedDerefBytesMinimum(
- std::max(int64_t(0), DerefBytes - OffsetSExt));
-
- if (this == &AA) {
- if (!Stripped) {
- // If nothing was stripped IR information is all we got.
- T.takeKnownDerefBytesMaximum(
- std::max(int64_t(0), DerefBytes - OffsetSExt));
- T.indicatePessimisticFixpoint();
- } else if (OffsetSExt > 0) {
- // If something was stripped but there is circular reasoning we look
- // for the offset. If it is positive we basically decrease the
- // dereferenceable bytes in a circluar loop now, which will simply
- // drive them down to the known value in a very slow way which we
- // can accelerate.
- T.indicatePessimisticFixpoint();
- }
- }
-
- return T.isValidState();
- };
-
- DerefState T;
- if (!genericValueTraversal<AADereferenceable, DerefState>(
- A, getIRPosition(), *this, T, VisitValueCB))
- return indicatePessimisticFixpoint();
-
- return Change | clampStateAndIndicateChange(getState(), T);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(dereferenceable)
- }
-};
-
-/// Dereferenceable attribute for a return value.
-struct AADereferenceableReturned final
- : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl,
- DerefState> {
- AADereferenceableReturned(const IRPosition &IRP)
- : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl,
- DerefState>(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FNRET_ATTR(dereferenceable)
- }
-};
-
-/// Dereferenceable attribute for an argument
-struct AADereferenceableArgument final
- : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
- AADereferenceable, AADereferenceableImpl, DerefState> {
- using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
- AADereferenceable, AADereferenceableImpl, DerefState>;
- AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_ARG_ATTR(dereferenceable)
- }
-};
-
-/// Dereferenceable attribute for a call site argument.
-struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
- AADereferenceableCallSiteArgument(const IRPosition &IRP)
- : AADereferenceableFloating(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CSARG_ATTR(dereferenceable)
- }
-};
-
-/// Dereferenceable attribute deduction for a call site return value.
-struct AADereferenceableCallSiteReturned final
- : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
- AADereferenceable, AADereferenceableImpl> {
- using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
- AADereferenceable, AADereferenceableImpl>;
- AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CS_ATTR(dereferenceable);
- }
-};
-
-// ------------------------ Align Argument Attribute ------------------------
-
-static unsigned int getKnownAlignForUse(Attributor &A,
- AbstractAttribute &QueryingAA,
- Value &AssociatedValue, const Use *U,
- const Instruction *I, bool &TrackUse) {
- // We need to follow common pointer manipulation uses to the accesses they
- // feed into.
- if (isa<CastInst>(I)) {
- // Follow all but ptr2int casts.
- TrackUse = !isa<PtrToIntInst>(I);
- return 0;
- }
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
- if (GEP->hasAllConstantIndices()) {
- TrackUse = true;
- return 0;
- }
- }
-
- unsigned Alignment = 0;
- if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
- if (ICS.isBundleOperand(U) || ICS.isCallee(U))
- return 0;
-
- unsigned ArgNo = ICS.getArgumentNo(U);
- IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
- // As long as we only use known information there is no need to track
- // dependences here.
- auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
- /* TrackDependence */ false);
- Alignment = AlignAA.getKnownAlign();
- }
-
- const Value *UseV = U->get();
- if (auto *SI = dyn_cast<StoreInst>(I))
- Alignment = SI->getAlignment();
- else if (auto *LI = dyn_cast<LoadInst>(I))
- Alignment = LI->getAlignment();
-
- if (Alignment <= 1)
- return 0;
-
- auto &DL = A.getDataLayout();
- int64_t Offset;
-
- if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) {
- if (Base == &AssociatedValue) {
- // BasePointerAddr + Offset = Alignment * Q for some integer Q.
- // So we can say that the maximum power of two which is a divisor of
- // gcd(Offset, Alignment) is an alignment.
-
- uint32_t gcd =
- greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment);
- Alignment = llvm::PowerOf2Floor(gcd);
- }
- }
-
- return Alignment;
-}
-struct AAAlignImpl : AAAlign {
- AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- SmallVector<Attribute, 4> Attrs;
- getAttrs({Attribute::Alignment}, Attrs);
- for (const Attribute &Attr : Attrs)
- takeKnownMaximum(Attr.getValueAsInt());
-
- if (getIRPosition().isFnInterfaceKind() &&
- (!getAssociatedFunction() ||
- !getAssociatedFunction()->hasExactDefinition()))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- // Check for users that allow alignment annotations.
- Value &AnchorVal = getIRPosition().getAnchorValue();
- for (const Use &U : AnchorVal.uses()) {
- if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
- if (SI->getPointerOperand() == &AnchorVal)
- if (SI->getAlignment() < getAssumedAlign()) {
- STATS_DECLTRACK(AAAlign, Store,
- "Number of times alignment added to a store");
- SI->setAlignment(Align(getAssumedAlign()));
- Changed = ChangeStatus::CHANGED;
- }
- } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
- if (LI->getPointerOperand() == &AnchorVal)
- if (LI->getAlignment() < getAssumedAlign()) {
- LI->setAlignment(Align(getAssumedAlign()));
- STATS_DECLTRACK(AAAlign, Load,
- "Number of times alignment added to a load");
- Changed = ChangeStatus::CHANGED;
- }
- }
- }
-
- return AAAlign::manifest(A) | Changed;
- }
-
- // TODO: Provide a helper to determine the implied ABI alignment and check in
- // the existing manifest method and a new one for AAAlignImpl that value
- // to avoid making the alignment explicit if it did not improve.
-
- /// See AbstractAttribute::getDeducedAttributes
- virtual void
- getDeducedAttributes(LLVMContext &Ctx,
- SmallVectorImpl<Attribute> &Attrs) const override {
- if (getAssumedAlign() > 1)
- Attrs.emplace_back(
- Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
- }
- /// See AAFromMustBeExecutedContext
- bool followUse(Attributor &A, const Use *U, const Instruction *I) {
- bool TrackUse = false;
-
- unsigned int KnownAlign =
- getKnownAlignForUse(A, *this, getAssociatedValue(), U, I, TrackUse);
- takeKnownMaximum(KnownAlign);
-
- return TrackUse;
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
- "-" + std::to_string(getAssumedAlign()) + ">")
- : "unknown-align";
- }
-};
-
-/// Align attribute for a floating value.
-struct AAAlignFloating : AAFromMustBeExecutedContext<AAAlign, AAAlignImpl> {
- using Base = AAFromMustBeExecutedContext<AAAlign, AAAlignImpl>;
- AAAlignFloating(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- Base::updateImpl(A);
-
- const DataLayout &DL = A.getDataLayout();
-
- auto VisitValueCB = [&](Value &V, AAAlign::StateType &T,
- bool Stripped) -> bool {
- const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V));
- if (!Stripped && this == &AA) {
- // Use only IR information if we did not strip anything.
- const MaybeAlign PA = V.getPointerAlignment(DL);
- T.takeKnownMaximum(PA ? PA->value() : 0);
- T.indicatePessimisticFixpoint();
- } else {
- // Use abstract attribute information.
- const AAAlign::StateType &DS =
- static_cast<const AAAlign::StateType &>(AA.getState());
- T ^= DS;
- }
- return T.isValidState();
- };
-
- StateType T;
- if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T,
- VisitValueCB))
- return indicatePessimisticFixpoint();
-
- // TODO: If we know we visited all incoming values, thus no are assumed
- // dead, we can take the known information from the state T.
- return clampStateAndIndicateChange(getState(), T);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) }
-};
-
-/// Align attribute for function return value.
-struct AAAlignReturned final
- : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
- AAAlignReturned(const IRPosition &IRP)
- : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
-};
-
-/// Align attribute for function argument.
-struct AAAlignArgument final
- : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign,
- AAAlignImpl> {
- AAAlignArgument(const IRPosition &IRP)
- : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign,
- AAAlignImpl>(
- IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) }
-};
-
-struct AAAlignCallSiteArgument final : AAAlignFloating {
- AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {}
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- return AAAlignImpl::manifest(A);
- }
-
- /// See AbstractAttribute::updateImpl(Attributor &A).
- ChangeStatus updateImpl(Attributor &A) override {
- ChangeStatus Changed = AAAlignFloating::updateImpl(A);
- if (Argument *Arg = getAssociatedArgument()) {
- const auto &ArgAlignAA = A.getAAFor<AAAlign>(
- *this, IRPosition::argument(*Arg), /* TrackDependence */ false,
- DepClassTy::OPTIONAL);
- takeKnownMaximum(ArgAlignAA.getKnownAlign());
- }
- return Changed;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) }
-};
-
-/// Align attribute deduction for a call site return value.
-struct AAAlignCallSiteReturned final
- : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign,
- AAAlignImpl> {
- using Base =
- AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign,
- AAAlignImpl>;
- AAAlignCallSiteReturned(const IRPosition &IRP) : Base(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
-};
-
-/// ------------------ Function No-Return Attribute ----------------------------
-struct AANoReturnImpl : public AANoReturn {
- AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoReturn::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F)
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumed() ? "noreturn" : "may-return";
- }
-
- /// See AbstractAttribute::updateImpl(Attributor &A).
- virtual ChangeStatus updateImpl(Attributor &A) override {
- auto CheckForNoReturn = [](Instruction &) { return false; };
- if (!A.checkForAllInstructions(CheckForNoReturn, *this,
- {(unsigned)Instruction::Ret}))
- return indicatePessimisticFixpoint();
- return ChangeStatus::UNCHANGED;
- }
-};
-
-struct AANoReturnFunction final : AANoReturnImpl {
- AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) }
-};
-
-/// NoReturn attribute deduction for a call sites.
-struct AANoReturnCallSite final : AANoReturnImpl {
- AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoReturn::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
-};
-
-/// ----------------------- Variable Capturing ---------------------------------
-
-/// A class to hold the state of for no-capture attributes.
-struct AANoCaptureImpl : public AANoCapture {
- AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) {
- indicateOptimisticFixpoint();
- return;
- }
- Function *AnchorScope = getAnchorScope();
- if (isFnInterfaceKind() &&
- (!AnchorScope || !AnchorScope->hasExactDefinition())) {
- indicatePessimisticFixpoint();
- return;
- }
-
- // You cannot "capture" null in the default address space.
- if (isa<ConstantPointerNull>(getAssociatedValue()) &&
- getAssociatedValue().getType()->getPointerAddressSpace() == 0) {
- indicateOptimisticFixpoint();
- return;
- }
-
- const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope;
-
- // Check what state the associated function can actually capture.
- if (F)
- determineFunctionCaptureCapabilities(getIRPosition(), *F, *this);
- else
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
-
- /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
- virtual void
- getDeducedAttributes(LLVMContext &Ctx,
- SmallVectorImpl<Attribute> &Attrs) const override {
- if (!isAssumedNoCaptureMaybeReturned())
- return;
-
- if (getArgNo() >= 0) {
- if (isAssumedNoCapture())
- Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
- else if (ManifestInternal)
- Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned"));
- }
- }
-
- /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
- /// depending on the ability of the function associated with \p IRP to capture
- /// state in memory and through "returning/throwing", respectively.
- static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
- const Function &F,
- BitIntegerState &State) {
- // TODO: Once we have memory behavior attributes we should use them here.
-
- // If we know we cannot communicate or write to memory, we do not care about
- // ptr2int anymore.
- if (F.onlyReadsMemory() && F.doesNotThrow() &&
- F.getReturnType()->isVoidTy()) {
- State.addKnownBits(NO_CAPTURE);
- return;
- }
-
- // A function cannot capture state in memory if it only reads memory, it can
- // however return/throw state and the state might be influenced by the
- // pointer value, e.g., loading from a returned pointer might reveal a bit.
- if (F.onlyReadsMemory())
- State.addKnownBits(NOT_CAPTURED_IN_MEM);
-
- // A function cannot communicate state back if it does not through
- // exceptions and doesn not return values.
- if (F.doesNotThrow() && F.getReturnType()->isVoidTy())
- State.addKnownBits(NOT_CAPTURED_IN_RET);
-
- // Check existing "returned" attributes.
- int ArgNo = IRP.getArgNo();
- if (F.doesNotThrow() && ArgNo >= 0) {
- for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
- if (F.hasParamAttribute(u, Attribute::Returned)) {
- if (u == unsigned(ArgNo))
- State.removeAssumedBits(NOT_CAPTURED_IN_RET);
- else if (F.onlyReadsMemory())
- State.addKnownBits(NO_CAPTURE);
- else
- State.addKnownBits(NOT_CAPTURED_IN_RET);
- break;
- }
- }
- }
-
- /// See AbstractState::getAsStr().
- const std::string getAsStr() const override {
- if (isKnownNoCapture())
- return "known not-captured";
- if (isAssumedNoCapture())
- return "assumed not-captured";
- if (isKnownNoCaptureMaybeReturned())
- return "known not-captured-maybe-returned";
- if (isAssumedNoCaptureMaybeReturned())
- return "assumed not-captured-maybe-returned";
- return "assumed-captured";
- }
-};
-
-/// Attributor-aware capture tracker.
-struct AACaptureUseTracker final : public CaptureTracker {
-
- /// Create a capture tracker that can lookup in-flight abstract attributes
- /// through the Attributor \p A.
- ///
- /// If a use leads to a potential capture, \p CapturedInMemory is set and the
- /// search is stopped. If a use leads to a return instruction,
- /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
- /// If a use leads to a ptr2int which may capture the value,
- /// \p CapturedInInteger is set. If a use is found that is currently assumed
- /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
- /// set. All values in \p PotentialCopies are later tracked as well. For every
- /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
- /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
- /// conservatively set to true.
- AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
- const AAIsDead &IsDeadAA, AANoCapture::StateType &State,
- SmallVectorImpl<const Value *> &PotentialCopies,
- unsigned &RemainingUsesToExplore)
- : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
- PotentialCopies(PotentialCopies),
- RemainingUsesToExplore(RemainingUsesToExplore) {}
-
- /// Determine if \p V maybe captured. *Also updates the state!*
- bool valueMayBeCaptured(const Value *V) {
- if (V->getType()->isPointerTy()) {
- PointerMayBeCaptured(V, this);
- } else {
- State.indicatePessimisticFixpoint();
- }
- return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
- }
-
- /// See CaptureTracker::tooManyUses().
- void tooManyUses() override {
- State.removeAssumedBits(AANoCapture::NO_CAPTURE);
- }
-
- bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
- if (CaptureTracker::isDereferenceableOrNull(O, DL))
- return true;
- const auto &DerefAA =
- A.getAAFor<AADereferenceable>(NoCaptureAA, IRPosition::value(*O));
- return DerefAA.getAssumedDereferenceableBytes();
- }
-
- /// See CaptureTracker::captured(...).
- bool captured(const Use *U) override {
- Instruction *UInst = cast<Instruction>(U->getUser());
- LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
- << "\n");
-
- // Because we may reuse the tracker multiple times we keep track of the
- // number of explored uses ourselves as well.
- if (RemainingUsesToExplore-- == 0) {
- LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
- /* Return */ true);
- }
-
- // Deal with ptr2int by following uses.
- if (isa<PtrToIntInst>(UInst)) {
- LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
- return valueMayBeCaptured(UInst);
- }
-
- // Explicitly catch return instructions.
- if (isa<ReturnInst>(UInst))
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
- /* Return */ true);
-
- // For now we only use special logic for call sites. However, the tracker
- // itself knows about a lot of other non-capturing cases already.
- CallSite CS(UInst);
- if (!CS || !CS.isArgOperand(U))
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
- /* Return */ true);
-
- unsigned ArgNo = CS.getArgumentNo(U);
- const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo);
- // If we have a abstract no-capture attribute for the argument we can use
- // it to justify a non-capture attribute here. This allows recursion!
- auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
- if (ArgNoCaptureAA.isAssumedNoCapture())
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
- /* Return */ false);
- if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
- addPotentialCopy(CS);
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
- /* Return */ false);
- }
-
- // Lastly, we could not find a reason no-capture can be assumed so we don't.
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
- /* Return */ true);
- }
-
- /// Register \p CS as potential copy of the value we are checking.
- void addPotentialCopy(CallSite CS) {
- PotentialCopies.push_back(CS.getInstruction());
- }
-
- /// See CaptureTracker::shouldExplore(...).
- bool shouldExplore(const Use *U) override {
- // Check liveness.
- return !IsDeadAA.isAssumedDead(cast<Instruction>(U->getUser()));
- }
-
- /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
- /// \p CapturedInRet, then return the appropriate value for use in the
- /// CaptureTracker::captured() interface.
- bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
- bool CapturedInRet) {
- LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
- << CapturedInInt << "|Ret " << CapturedInRet << "]\n");
- if (CapturedInMem)
- State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM);
- if (CapturedInInt)
- State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
- if (CapturedInRet)
- State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
- return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
- }
-
-private:
- /// The attributor providing in-flight abstract attributes.
- Attributor &A;
-
- /// The abstract attribute currently updated.
- AANoCapture &NoCaptureAA;
-
- /// The abstract liveness state.
- const AAIsDead &IsDeadAA;
-
- /// The state currently updated.
- AANoCapture::StateType &State;
-
- /// Set of potential copies of the tracked value.
- SmallVectorImpl<const Value *> &PotentialCopies;
-
- /// Global counter to limit the number of explored uses.
- unsigned &RemainingUsesToExplore;
-};
-
-ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
- const IRPosition &IRP = getIRPosition();
- const Value *V =
- getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue();
- if (!V)
- return indicatePessimisticFixpoint();
-
- const Function *F =
- getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
- assert(F && "Expected a function!");
- const IRPosition &FnPos = IRPosition::function(*F);
- const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos);
-
- AANoCapture::StateType T;
-
- // Readonly means we cannot capture through memory.
- const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
- if (FnMemAA.isAssumedReadOnly()) {
- T.addKnownBits(NOT_CAPTURED_IN_MEM);
- if (FnMemAA.isKnownReadOnly())
- addKnownBits(NOT_CAPTURED_IN_MEM);
- }
+ IsDeadAA = &getOrCreateAAFor<AAIsDead>(IRP, QueryingAA,
+ /* TrackDependence */ false);
+ // Don't check liveness for AAIsDead.
+ if (QueryingAA == IsDeadAA)
+ return false;
- // Make sure all returned values are different than the underlying value.
- // TODO: we could do this in a more sophisticated way inside
- // AAReturnedValues, e.g., track all values that escape through returns
- // directly somehow.
- auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
- bool SeenConstant = false;
- for (auto &It : RVAA.returned_values()) {
- if (isa<Constant>(It.first)) {
- if (SeenConstant)
- return false;
- SeenConstant = true;
- } else if (!isa<Argument>(It.first) ||
- It.first == getAssociatedArgument())
- return false;
- }
+ if (IsDeadAA->isAssumedDead()) {
+ if (QueryingAA)
+ recordDependence(*IsDeadAA, *QueryingAA, DepClass);
return true;
- };
-
- const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(*this, FnPos);
- if (NoUnwindAA.isAssumedNoUnwind()) {
- bool IsVoidTy = F->getReturnType()->isVoidTy();
- const AAReturnedValues *RVAA =
- IsVoidTy ? nullptr : &A.getAAFor<AAReturnedValues>(*this, FnPos);
- if (IsVoidTy || CheckReturnedArgs(*RVAA)) {
- T.addKnownBits(NOT_CAPTURED_IN_RET);
- if (T.isKnown(NOT_CAPTURED_IN_MEM))
- return ChangeStatus::UNCHANGED;
- if (NoUnwindAA.isKnownNoUnwind() &&
- (IsVoidTy || RVAA->getState().isAtFixpoint())) {
- addKnownBits(NOT_CAPTURED_IN_RET);
- if (isKnown(NOT_CAPTURED_IN_MEM))
- return indicateOptimisticFixpoint();
- }
- }
}
- // Use the CaptureTracker interface and logic with the specialized tracker,
- // defined in AACaptureUseTracker, that can look at in-flight abstract
- // attributes and directly updates the assumed state.
- SmallVector<const Value *, 4> PotentialCopies;
- unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore;
- AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
- RemainingUsesToExplore);
-
- // Check all potential copies of the associated value until we can assume
- // none will be captured or we have to assume at least one might be.
- unsigned Idx = 0;
- PotentialCopies.push_back(V);
- while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
- Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
-
- AANoCapture::StateType &S = getState();
- auto Assumed = S.getAssumed();
- S.intersectAssumedBits(T.getAssumed());
- if (!isAssumedNoCaptureMaybeReturned())
- return indicatePessimisticFixpoint();
- return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
+ return false;
}
-/// NoCapture attribute for function arguments.
-struct AANoCaptureArgument final : AANoCaptureImpl {
- AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) }
-};
-
-/// NoCapture attribute for call site arguments.
-struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
- AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (Argument *Arg = getAssociatedArgument())
- if (Arg->hasByValAttr())
- indicateOptimisticFixpoint();
- AANoCaptureImpl::initialize(A);
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Argument *Arg = getAssociatedArgument();
- if (!Arg)
- return indicatePessimisticFixpoint();
- const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)};
-};
-
-/// NoCapture attribute for floating values.
-struct AANoCaptureFloating final : AANoCaptureImpl {
- AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(nocapture)
- }
-};
-
-/// NoCapture attribute for function return value.
-struct AANoCaptureReturned final : AANoCaptureImpl {
- AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {
- llvm_unreachable("NoCapture is not applicable to function returns!");
- }
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- llvm_unreachable("NoCapture is not applicable to function returns!");
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable("NoCapture is not applicable to function returns!");
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-};
-
-/// NoCapture attribute deduction for a call site return value.
-struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
- AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CSRET_ATTR(nocapture)
- }
-};
-
-/// ------------------ Value Simplify Attribute ----------------------------
-struct AAValueSimplifyImpl : AAValueSimplify {
- AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {}
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple")
- : "not-simple";
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-
- /// See AAValueSimplify::getAssumedSimplifiedValue()
- Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
- if (!getAssumed())
- return const_cast<Value *>(&getAssociatedValue());
- return SimplifiedAssociatedValue;
- }
- void initialize(Attributor &A) override {}
-
- /// Helper function for querying AAValueSimplify and updating candicate.
- /// \param QueryingValue Value trying to unify with SimplifiedValue
- /// \param AccumulatedSimplifiedValue Current simplification result.
- static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA,
- Value &QueryingValue,
- Optional<Value *> &AccumulatedSimplifiedValue) {
- // FIXME: Add a typecast support.
-
- auto &ValueSimpifyAA = A.getAAFor<AAValueSimplify>(
- QueryingAA, IRPosition::value(QueryingValue));
-
- Optional<Value *> QueryingValueSimplified =
- ValueSimpifyAA.getAssumedSimplifiedValue(A);
-
- if (!QueryingValueSimplified.hasValue())
- return true;
-
- if (!QueryingValueSimplified.getValue())
- return false;
-
- Value &QueryingValueSimplifiedUnwrapped =
- *QueryingValueSimplified.getValue();
+bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
+ const AbstractAttribute &QueryingAA,
+ const Value &V, DepClassTy LivenessDepClass) {
- if (isa<UndefValue>(QueryingValueSimplifiedUnwrapped))
- return true;
-
- if (AccumulatedSimplifiedValue.hasValue())
- return AccumulatedSimplifiedValue == QueryingValueSimplified;
-
- LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue
- << " is assumed to be "
- << QueryingValueSimplifiedUnwrapped << "\n");
-
- AccumulatedSimplifiedValue = QueryingValueSimplified;
- return true;
- }
-
- bool askSimplifiedValueForAAValueConstantRange(Attributor &A) {
- if (!getAssociatedValue().getType()->isIntegerTy())
- return false;
-
- const auto &ValueConstantRangeAA =
- A.getAAFor<AAValueConstantRange>(*this, getIRPosition());
-
- Optional<ConstantInt *> COpt =
- ValueConstantRangeAA.getAssumedConstantInt(A);
- if (COpt.hasValue()) {
- if (auto *C = COpt.getValue())
- SimplifiedAssociatedValue = C;
- else
- return false;
- } else {
- // FIXME: It should be llvm::None but if you set llvm::None,
- // values are mistakenly infered as `undef` now.
- SimplifiedAssociatedValue = &getAssociatedValue();
- }
+ // Check the trivial case first as it catches void values.
+ if (V.use_empty())
return true;
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- if (!SimplifiedAssociatedValue.hasValue() ||
- !SimplifiedAssociatedValue.getValue())
- return Changed;
-
- if (auto *C = dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())) {
- // We can replace the AssociatedValue with the constant.
- Value &V = getAssociatedValue();
- if (!V.user_empty() && &V != C && V.getType() == C->getType()) {
- LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C
- << "\n");
- A.changeValueAfterManifest(V, *C);
- Changed = ChangeStatus::CHANGED;
- }
- }
-
- return Changed | AAValueSimplify::manifest(A);
- }
-
- /// See AbstractState::indicatePessimisticFixpoint(...).
- ChangeStatus indicatePessimisticFixpoint() override {
- // NOTE: Associated value will be returned in a pessimistic fixpoint and is
- // regarded as known. That's why`indicateOptimisticFixpoint` is called.
- SimplifiedAssociatedValue = &getAssociatedValue();
- indicateOptimisticFixpoint();
- return ChangeStatus::CHANGED;
- }
-
-protected:
- // An assumed simplified value. Initially, it is set to Optional::None, which
- // means that the value is not clear under current assumption. If in the
- // pessimistic state, getAssumedSimplifiedValue doesn't return this value but
- // returns orignal associated value.
- Optional<Value *> SimplifiedAssociatedValue;
-};
-
-struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
- AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
-
- void initialize(Attributor &A) override {
- AAValueSimplifyImpl::initialize(A);
- if (!getAssociatedFunction() || getAssociatedFunction()->isDeclaration())
- indicatePessimisticFixpoint();
- if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
- /* IgnoreSubsumingPositions */ true))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // Byval is only replacable if it is readonly otherwise we would write into
- // the replaced value and not the copy that byval creates implicitly.
- Argument *Arg = getAssociatedArgument();
- if (Arg->hasByValAttr()) {
- const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
- if (!MemAA.isAssumedReadOnly())
- return indicatePessimisticFixpoint();
- }
-
- bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
-
- auto PredForCallSite = [&](AbstractCallSite ACS) {
- // Check if we have an associated argument or not (which can happen for
- // callback calls).
- Value *ArgOp = ACS.getCallArgOperand(getArgNo());
- if (!ArgOp)
- return false;
- // We can only propagate thread independent values through callbacks.
- // This is different to direct/indirect call sites because for them we
- // know the thread executing the caller and callee is the same. For
- // callbacks this is not guaranteed, thus a thread dependent value could
- // be different for the caller and callee, making it invalid to propagate.
- if (ACS.isCallbackCall())
- if (auto *C = dyn_cast<Constant>(ArgOp))
- if (C->isThreadDependent())
- return false;
- return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue);
- };
-
- if (!A.checkForAllCallSites(PredForCallSite, *this, true))
- if (!askSimplifiedValueForAAValueConstantRange(A))
- return indicatePessimisticFixpoint();
-
- // If a candicate was found in this update, return CHANGED.
- return HasValueBefore == SimplifiedAssociatedValue.hasValue()
- ? ChangeStatus::UNCHANGED
- : ChangeStatus ::CHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_ARG_ATTR(value_simplify)
- }
-};
-
-struct AAValueSimplifyReturned : AAValueSimplifyImpl {
- AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
-
- auto PredForReturned = [&](Value &V) {
- return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
- };
-
- if (!A.checkForAllReturnedValues(PredForReturned, *this))
- if (!askSimplifiedValueForAAValueConstantRange(A))
- return indicatePessimisticFixpoint();
-
- // If a candicate was found in this update, return CHANGED.
- return HasValueBefore == SimplifiedAssociatedValue.hasValue()
- ? ChangeStatus::UNCHANGED
- : ChangeStatus ::CHANGED;
- }
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FNRET_ATTR(value_simplify)
- }
-};
-
-struct AAValueSimplifyFloating : AAValueSimplifyImpl {
- AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Value &V = getAnchorValue();
-
- // TODO: add other stuffs
- if (isa<Constant>(V))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
-
- auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool {
- auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
- if (!Stripped && this == &AA) {
- // TODO: Look the instruction and check recursively.
-
- LLVM_DEBUG(
- dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : "
- << V << "\n");
- return false;
- }
- return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
- };
-
- if (!genericValueTraversal<AAValueSimplify, BooleanState>(
- A, getIRPosition(), *this, static_cast<BooleanState &>(*this),
- VisitValueCB))
- if (!askSimplifiedValueForAAValueConstantRange(A))
- return indicatePessimisticFixpoint();
-
- // If a candicate was found in this update, return CHANGED.
-
- return HasValueBefore == SimplifiedAssociatedValue.hasValue()
- ? ChangeStatus::UNCHANGED
- : ChangeStatus ::CHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(value_simplify)
- }
-};
-
-struct AAValueSimplifyFunction : AAValueSimplifyImpl {
- AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- SimplifiedAssociatedValue = &getAnchorValue();
- indicateOptimisticFixpoint();
- }
- /// See AbstractAttribute::initialize(...).
- ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable(
- "AAValueSimplify(Function|CallSite)::updateImpl will not be called");
- }
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FN_ATTR(value_simplify)
- }
-};
-
-struct AAValueSimplifyCallSite : AAValueSimplifyFunction {
- AAValueSimplifyCallSite(const IRPosition &IRP)
- : AAValueSimplifyFunction(IRP) {}
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CS_ATTR(value_simplify)
- }
-};
-
-struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned {
- AAValueSimplifyCallSiteReturned(const IRPosition &IRP)
- : AAValueSimplifyReturned(IRP) {}
-
- void trackStatistics() const override {
- STATS_DECLTRACK_CSRET_ATTR(value_simplify)
- }
-};
-struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
- AAValueSimplifyCallSiteArgument(const IRPosition &IRP)
- : AAValueSimplifyFloating(IRP) {}
-
- void trackStatistics() const override {
- STATS_DECLTRACK_CSARG_ATTR(value_simplify)
- }
-};
-
-/// ----------------------- Heap-To-Stack Conversion ---------------------------
-struct AAHeapToStackImpl : public AAHeapToStack {
- AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {}
-
- const std::string getAsStr() const override {
- return "[H2S] Mallocs: " + std::to_string(MallocCalls.size());
- }
-
- ChangeStatus manifest(Attributor &A) override {
- assert(getState().isValidState() &&
- "Attempted to manifest an invalid state!");
-
- ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
- Function *F = getAssociatedFunction();
- const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
-
- for (Instruction *MallocCall : MallocCalls) {
- // This malloc cannot be replaced.
- if (BadMallocCalls.count(MallocCall))
- continue;
-
- for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
- LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
- A.deleteAfterManifest(*FreeCall);
- HasChanged = ChangeStatus::CHANGED;
- }
-
- LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
- << "\n");
-
- Constant *Size;
- if (isCallocLikeFn(MallocCall, TLI)) {
- auto *Num = cast<ConstantInt>(MallocCall->getOperand(0));
- auto *SizeT = dyn_cast<ConstantInt>(MallocCall->getOperand(1));
- APInt TotalSize = SizeT->getValue() * Num->getValue();
- Size =
- ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize);
- } else {
- Size = cast<ConstantInt>(MallocCall->getOperand(0));
- }
-
- unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
- Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
- Size, "", MallocCall->getNextNode());
-
- if (AI->getType() != MallocCall->getType())
- AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
- AI->getNextNode());
-
- replaceAllInstructionUsesWith(*MallocCall, *AI);
-
- if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
- auto *NBB = II->getNormalDest();
- BranchInst::Create(NBB, MallocCall->getParent());
- A.deleteAfterManifest(*MallocCall);
- } else {
- A.deleteAfterManifest(*MallocCall);
- }
-
- if (isCallocLikeFn(MallocCall, TLI)) {
- auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
- AI->getNextNode());
- Value *Ops[] = {
- BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
- ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
-
- Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
- Module *M = F->getParent();
- Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
- CallInst::Create(Fn, Ops, "", BI->getNextNode());
- }
- HasChanged = ChangeStatus::CHANGED;
- }
-
- return HasChanged;
- }
-
- /// Collection of all malloc calls in a function.
- SmallSetVector<Instruction *, 4> MallocCalls;
-
- /// Collection of malloc calls that cannot be converted.
- DenseSet<const Instruction *> BadMallocCalls;
-
- /// A map for each malloc call to the set of associated free calls.
- DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
-
- ChangeStatus updateImpl(Attributor &A) override;
-};
-
-ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
- const Function *F = getAssociatedFunction();
- const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
-
- MustBeExecutedContextExplorer &Explorer =
- A.getInfoCache().getMustBeExecutedContextExplorer();
-
- auto FreeCheck = [&](Instruction &I) {
- const auto &Frees = FreesForMalloc.lookup(&I);
- if (Frees.size() != 1)
- return false;
- Instruction *UniqueFree = *Frees.begin();
- return Explorer.findInContextOf(UniqueFree, I.getNextNode());
- };
-
- auto UsesCheck = [&](Instruction &I) {
- bool ValidUsesOnly = true;
- bool MustUse = true;
- auto Pred = [&](const Use &U, bool &Follow) -> bool {
- Instruction *UserI = cast<Instruction>(U.getUser());
- if (isa<LoadInst>(UserI))
- return true;
- if (auto *SI = dyn_cast<StoreInst>(UserI)) {
- if (SI->getValueOperand() == U.get()) {
- LLVM_DEBUG(dbgs()
- << "[H2S] escaping store to memory: " << *UserI << "\n");
- ValidUsesOnly = false;
- } else {
- // A store into the malloc'ed memory is fine.
- }
- return true;
- }
- if (auto *CB = dyn_cast<CallBase>(UserI)) {
- if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
- return true;
- // Record malloc.
- if (isFreeCall(UserI, TLI)) {
- if (MustUse) {
- FreesForMalloc[&I].insert(UserI);
- } else {
- LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: "
- << *UserI << "\n");
- ValidUsesOnly = false;
- }
- return true;
- }
-
- unsigned ArgNo = CB->getArgOperandNo(&U);
-
- const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
- *this, IRPosition::callsite_argument(*CB, ArgNo));
-
- // If a callsite argument use is nofree, we are fine.
- const auto &ArgNoFreeAA = A.getAAFor<AANoFree>(
- *this, IRPosition::callsite_argument(*CB, ArgNo));
-
- if (!NoCaptureAA.isAssumedNoCapture() ||
- !ArgNoFreeAA.isAssumedNoFree()) {
- LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
- ValidUsesOnly = false;
- }
- return true;
- }
-
- if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
- isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
- MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
- Follow = true;
- return true;
- }
- // Unknown user for which we can not track uses further (in a way that
- // makes sense).
- LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
- ValidUsesOnly = false;
- return true;
- };
- A.checkForAllUses(Pred, *this, I);
- return ValidUsesOnly;
- };
-
- auto MallocCallocCheck = [&](Instruction &I) {
- if (BadMallocCalls.count(&I))
- return true;
-
- bool IsMalloc = isMallocLikeFn(&I, TLI);
- bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
- if (!IsMalloc && !IsCalloc) {
- BadMallocCalls.insert(&I);
- return true;
- }
-
- if (IsMalloc) {
- if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
- if (Size->getValue().ule(MaxHeapToStackSize))
- if (UsesCheck(I) || FreeCheck(I)) {
- MallocCalls.insert(&I);
- return true;
- }
- } else if (IsCalloc) {
- bool Overflow = false;
- if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
- if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
- if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
- .ule(MaxHeapToStackSize))
- if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
- MallocCalls.insert(&I);
- return true;
- }
- }
-
- BadMallocCalls.insert(&I);
- return true;
- };
-
- size_t NumBadMallocs = BadMallocCalls.size();
-
- A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
-
- if (NumBadMallocs != BadMallocCalls.size())
- return ChangeStatus::CHANGED;
-
- return ChangeStatus::UNCHANGED;
-}
-
-struct AAHeapToStackFunction final : public AAHeapToStackImpl {
- AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECL(MallocCalls, Function,
- "Number of malloc calls converted to allocas");
- for (auto *C : MallocCalls)
- if (!BadMallocCalls.count(C))
- ++BUILD_STAT_NAME(MallocCalls, Function);
- }
-};
-
-/// -------------------- Memory Behavior Attributes ----------------------------
-/// Includes read-none, read-only, and write-only.
-/// ----------------------------------------------------------------------------
-struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
- AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- intersectAssumedBits(BEST_STATE);
- getKnownStateFromValue(getIRPosition(), getState());
- IRAttribute::initialize(A);
- }
-
- /// Return the memory behavior information encoded in the IR for \p IRP.
- static void getKnownStateFromValue(const IRPosition &IRP,
- BitIntegerState &State,
- bool IgnoreSubsumingPositions = false) {
- SmallVector<Attribute, 2> Attrs;
- IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
- for (const Attribute &Attr : Attrs) {
- switch (Attr.getKindAsEnum()) {
- case Attribute::ReadNone:
- State.addKnownBits(NO_ACCESSES);
- break;
- case Attribute::ReadOnly:
- State.addKnownBits(NO_WRITES);
- break;
- case Attribute::WriteOnly:
- State.addKnownBits(NO_READS);
- break;
- default:
- llvm_unreachable("Unexpcted attribute!");
- }
- }
-
- if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) {
- if (!I->mayReadFromMemory())
- State.addKnownBits(NO_READS);
- if (!I->mayWriteToMemory())
- State.addKnownBits(NO_WRITES);
- }
- }
-
- /// See AbstractAttribute::getDeducedAttributes(...).
- void getDeducedAttributes(LLVMContext &Ctx,
- SmallVectorImpl<Attribute> &Attrs) const override {
- assert(Attrs.size() == 0);
- if (isAssumedReadNone())
- Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
- else if (isAssumedReadOnly())
- Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly));
- else if (isAssumedWriteOnly())
- Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly));
- assert(Attrs.size() <= 1);
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- const IRPosition &IRP = getIRPosition();
-
- // Check if we would improve the existing attributes first.
- SmallVector<Attribute, 4> DeducedAttrs;
- getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
- if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
- return IRP.hasAttr(Attr.getKindAsEnum(),
- /* IgnoreSubsumingPositions */ true);
- }))
- return ChangeStatus::UNCHANGED;
-
- // Clear existing attributes.
- IRP.removeAttrs(AttrKinds);
-
- // Use the generic manifest method.
- return IRAttribute::manifest(A);
- }
-
- /// See AbstractState::getAsStr().
- const std::string getAsStr() const override {
- if (isAssumedReadNone())
- return "readnone";
- if (isAssumedReadOnly())
- return "readonly";
- if (isAssumedWriteOnly())
- return "writeonly";
- return "may-read/write";
- }
-
- /// The set of IR attributes AAMemoryBehavior deals with.
- static const Attribute::AttrKind AttrKinds[3];
-};
-
-const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = {
- Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly};
-
-/// Memory behavior attribute for a floating value.
-struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
- AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAMemoryBehaviorImpl::initialize(A);
- // Initialize the use vector with all direct uses of the associated value.
- for (const Use &U : getAssociatedValue().uses())
- Uses.insert(&U);
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- if (isAssumedReadNone())
- STATS_DECLTRACK_FLOATING_ATTR(readnone)
- else if (isAssumedReadOnly())
- STATS_DECLTRACK_FLOATING_ATTR(readonly)
- else if (isAssumedWriteOnly())
- STATS_DECLTRACK_FLOATING_ATTR(writeonly)
- }
-
-private:
- /// Return true if users of \p UserI might access the underlying
- /// variable/location described by \p U and should therefore be analyzed.
- bool followUsersOfUseIn(Attributor &A, const Use *U,
- const Instruction *UserI);
-
- /// Update the state according to the effect of use \p U in \p UserI.
- void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI);
-
-protected:
- /// Container for (transitive) uses of the associated argument.
- SetVector<const Use *> Uses;
-};
-
-/// Memory behavior attribute for function argument.
-struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
- AAMemoryBehaviorArgument(const IRPosition &IRP)
- : AAMemoryBehaviorFloating(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- intersectAssumedBits(BEST_STATE);
- const IRPosition &IRP = getIRPosition();
- // TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we
- // can query it when we use has/getAttr. That would allow us to reuse the
- // initialize of the base class here.
- bool HasByVal =
- IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true);
- getKnownStateFromValue(IRP, getState(),
- /* IgnoreSubsumingPositions */ HasByVal);
-
- // Initialize the use vector with all direct uses of the associated value.
- Argument *Arg = getAssociatedArgument();
- if (!Arg || !Arg->getParent()->hasExactDefinition()) {
- indicatePessimisticFixpoint();
- } else {
- // Initialize the use vector with all direct uses of the associated value.
- for (const Use &U : Arg->uses())
- Uses.insert(&U);
- }
- }
-
- ChangeStatus manifest(Attributor &A) override {
- // TODO: From readattrs.ll: "inalloca parameters are always
- // considered written"
- if (hasAttr({Attribute::InAlloca})) {
- removeKnownBits(NO_WRITES);
- removeAssumedBits(NO_WRITES);
- }
- return AAMemoryBehaviorFloating::manifest(A);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- if (isAssumedReadNone())
- STATS_DECLTRACK_ARG_ATTR(readnone)
- else if (isAssumedReadOnly())
- STATS_DECLTRACK_ARG_ATTR(readonly)
- else if (isAssumedWriteOnly())
- STATS_DECLTRACK_ARG_ATTR(writeonly)
- }
-};
-
-struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
- AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP)
- : AAMemoryBehaviorArgument(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (Argument *Arg = getAssociatedArgument()) {
- if (Arg->hasByValAttr()) {
- addKnownBits(NO_WRITES);
- removeKnownBits(NO_READS);
- removeAssumedBits(NO_READS);
- }
- } else {
- }
- AAMemoryBehaviorArgument::initialize(A);
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Argument *Arg = getAssociatedArgument();
- const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AAMemoryBehavior::StateType &>(ArgAA.getState()));
- }
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- if (isAssumedReadNone())
- STATS_DECLTRACK_CSARG_ATTR(readnone)
- else if (isAssumedReadOnly())
- STATS_DECLTRACK_CSARG_ATTR(readonly)
- else if (isAssumedWriteOnly())
- STATS_DECLTRACK_CSARG_ATTR(writeonly)
- }
-};
-
-/// Memory behavior attribute for a call site return position.
-struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
- AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP)
- : AAMemoryBehaviorFloating(IRP) {}
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- // We do not annotate returned values.
- return ChangeStatus::UNCHANGED;
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
-};
-
-/// An AA to represent the memory behavior function attributes.
-struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
- AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(Attributor &A).
- virtual ChangeStatus updateImpl(Attributor &A) override;
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- Function &F = cast<Function>(getAnchorValue());
- if (isAssumedReadNone()) {
- F.removeFnAttr(Attribute::ArgMemOnly);
- F.removeFnAttr(Attribute::InaccessibleMemOnly);
- F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
- }
- return AAMemoryBehaviorImpl::manifest(A);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- if (isAssumedReadNone())
- STATS_DECLTRACK_FN_ATTR(readnone)
- else if (isAssumedReadOnly())
- STATS_DECLTRACK_FN_ATTR(readonly)
- else if (isAssumedWriteOnly())
- STATS_DECLTRACK_FN_ATTR(writeonly)
- }
-};
-
-/// AAMemoryBehavior attribute for call sites.
-struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
- AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAMemoryBehaviorImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || !F->hasExactDefinition())
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness liveness information and then it makes
- // sense to specialize attributes for call sites arguments instead of
- // redirecting requests to the callee argument.
- Function *F = getAssociatedFunction();
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
- return clampStateAndIndicateChange(
- getState(),
- static_cast<const AAMemoryBehavior::StateType &>(FnAA.getState()));
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- if (isAssumedReadNone())
- STATS_DECLTRACK_CS_ATTR(readnone)
- else if (isAssumedReadOnly())
- STATS_DECLTRACK_CS_ATTR(readonly)
- else if (isAssumedWriteOnly())
- STATS_DECLTRACK_CS_ATTR(writeonly)
- }
-};
-} // namespace
-
-ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
-
- // The current assumed state used to determine a change.
- auto AssumedState = getAssumed();
-
- auto CheckRWInst = [&](Instruction &I) {
- // If the instruction has an own memory behavior state, use it to restrict
- // the local state. No further analysis is required as the other memory
- // state is as optimistic as it gets.
- if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
- const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
- *this, IRPosition::callsite_function(ICS));
- intersectAssumedBits(MemBehaviorAA.getAssumed());
- return !isAtFixpoint();
- }
-
- // Remove access kind modifiers if necessary.
- if (I.mayReadFromMemory())
- removeAssumedBits(NO_READS);
- if (I.mayWriteToMemory())
- removeAssumedBits(NO_WRITES);
- return !isAtFixpoint();
- };
-
- if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
- return indicatePessimisticFixpoint();
-
- return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
- : ChangeStatus::UNCHANGED;
-}
-
-ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
-
- const IRPosition &IRP = getIRPosition();
- const IRPosition &FnPos = IRPosition::function_scope(IRP);
- AAMemoryBehavior::StateType &S = getState();
-
- // First, check the function scope. We take the known information and we avoid
- // work if the assumed information implies the current assumed information for
- // this attribute. This is a valid for all but byval arguments.
- Argument *Arg = IRP.getAssociatedArgument();
- AAMemoryBehavior::base_t FnMemAssumedState =
- AAMemoryBehavior::StateType::getWorstState();
- if (!Arg || !Arg->hasByValAttr()) {
- const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
- FnMemAssumedState = FnMemAA.getAssumed();
- S.addKnownBits(FnMemAA.getKnown());
- if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
- return ChangeStatus::UNCHANGED;
- }
-
- // Make sure the value is not captured (except through "return"), if
- // it is, any information derived would be irrelevant anyway as we cannot
- // check the potential aliases introduced by the capture. However, no need
- // to fall back to anythign less optimistic than the function state.
- const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
- *this, IRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
- if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
- S.intersectAssumedBits(FnMemAssumedState);
- return ChangeStatus::CHANGED;
- }
-
- // The current assumed state used to determine a change.
- auto AssumedState = S.getAssumed();
-
- // Liveness information to exclude dead users.
- // TODO: Take the FnPos once we have call site specific liveness information.
- const auto &LivenessAA = A.getAAFor<AAIsDead>(
- *this, IRPosition::function(*IRP.getAssociatedFunction()));
-
- // Visit and expand uses until all are analyzed or a fixpoint is reached.
- for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) {
- const Use *U = Uses[i];
- Instruction *UserI = cast<Instruction>(U->getUser());
- LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI
- << " [Dead: " << (LivenessAA.isAssumedDead(UserI))
- << "]\n");
- if (LivenessAA.isAssumedDead(UserI))
- continue;
-
- // Check if the users of UserI should also be visited.
- if (followUsersOfUseIn(A, U, UserI))
- for (const Use &UserIUse : UserI->uses())
- Uses.insert(&UserIUse);
-
- // If UserI might touch memory we analyze the use in detail.
- if (UserI->mayReadOrWriteMemory())
- analyzeUseIn(A, U, UserI);
- }
-
- return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
- : ChangeStatus::UNCHANGED;
-}
-
-bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
- const Instruction *UserI) {
- // The loaded value is unrelated to the pointer argument, no need to
- // follow the users of the load.
- if (isa<LoadInst>(UserI))
- return false;
-
- // By default we follow all uses assuming UserI might leak information on U,
- // we have special handling for call sites operands though.
- ImmutableCallSite ICS(UserI);
- if (!ICS || !ICS.isArgOperand(U))
+ // If the value is replaced by another one, for now a constant, we do not have
+ // uses. Note that this requires users of `checkForAllUses` to not recurse but
+ // instead use the `follow` callback argument to look at transitive users,
+ // however, that should be clear from the presence of the argument.
+ bool UsedAssumedInformation = false;
+ Optional<Constant *> C =
+ getAssumedConstant(V, QueryingAA, UsedAssumedInformation);
+ if (C.hasValue() && C.getValue()) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Value is simplified, uses skipped: " << V
+ << " -> " << *C.getValue() << "\n");
return true;
-
- // If the use is a call argument known not to be captured, the users of
- // the call do not need to be visited because they have to be unrelated to
- // the input. Note that this check is not trivial even though we disallow
- // general capturing of the underlying argument. The reason is that the
- // call might the argument "through return", which we allow and for which we
- // need to check call users.
- unsigned ArgNo = ICS.getArgumentNo(U);
- const auto &ArgNoCaptureAA =
- A.getAAFor<AANoCapture>(*this, IRPosition::callsite_argument(ICS, ArgNo));
- return !ArgNoCaptureAA.isAssumedNoCapture();
-}
-
-void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
- const Instruction *UserI) {
- assert(UserI->mayReadOrWriteMemory());
-
- switch (UserI->getOpcode()) {
- default:
- // TODO: Handle all atomics and other side-effect operations we know of.
- break;
- case Instruction::Load:
- // Loads cause the NO_READS property to disappear.
- removeAssumedBits(NO_READS);
- return;
-
- case Instruction::Store:
- // Stores cause the NO_WRITES property to disappear if the use is the
- // pointer operand. Note that we do assume that capturing was taken care of
- // somewhere else.
- if (cast<StoreInst>(UserI)->getPointerOperand() == U->get())
- removeAssumedBits(NO_WRITES);
- return;
-
- case Instruction::Call:
- case Instruction::CallBr:
- case Instruction::Invoke: {
- // For call sites we look at the argument memory behavior attribute (this
- // could be recursive!) in order to restrict our own state.
- ImmutableCallSite ICS(UserI);
-
- // Give up on operand bundles.
- if (ICS.isBundleOperand(U)) {
- indicatePessimisticFixpoint();
- return;
- }
-
- // Calling a function does read the function pointer, maybe write it if the
- // function is self-modifying.
- if (ICS.isCallee(U)) {
- removeAssumedBits(NO_READS);
- break;
- }
-
- // Adjust the possible access behavior based on the information on the
- // argument.
- unsigned ArgNo = ICS.getArgumentNo(U);
- const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo);
- const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
- // "assumed" has at most the same bits as the MemBehaviorAA assumed
- // and at least "known".
- intersectAssumedBits(MemBehaviorAA.getAssumed());
- return;
- }
- };
-
- // Generally, look at the "may-properties" and adjust the assumed state if we
- // did not trigger special handling before.
- if (UserI->mayReadFromMemory())
- removeAssumedBits(NO_READS);
- if (UserI->mayWriteToMemory())
- removeAssumedBits(NO_WRITES);
-}
-/// ------------------ Value Constant Range Attribute -------------------------
-
-struct AAValueConstantRangeImpl : AAValueConstantRange {
- using StateType = IntegerRangeState;
- AAValueConstantRangeImpl(const IRPosition &IRP) : AAValueConstantRange(IRP) {}
-
- /// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
- std::string Str;
- llvm::raw_string_ostream OS(Str);
- OS << "range(" << getBitWidth() << ")<";
- getKnown().print(OS);
- OS << " / ";
- getAssumed().print(OS);
- OS << ">";
- return OS.str();
- }
-
- /// Helper function to get a SCEV expr for the associated value at program
- /// point \p I.
- const SCEV *getSCEV(Attributor &A, const Instruction *I = nullptr) const {
- if (!getAnchorScope())
- return nullptr;
-
- ScalarEvolution *SE =
- A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
- *getAnchorScope());
-
- LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(
- *getAnchorScope());
-
- if (!SE || !LI)
- return nullptr;
-
- const SCEV *S = SE->getSCEV(&getAssociatedValue());
- if (!I)
- return S;
-
- return SE->getSCEVAtScope(S, LI->getLoopFor(I->getParent()));
- }
-
- /// Helper function to get a range from SCEV for the associated value at
- /// program point \p I.
- ConstantRange getConstantRangeFromSCEV(Attributor &A,
- const Instruction *I = nullptr) const {
- if (!getAnchorScope())
- return getWorstState(getBitWidth());
-
- ScalarEvolution *SE =
- A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
- *getAnchorScope());
-
- const SCEV *S = getSCEV(A, I);
- if (!SE || !S)
- return getWorstState(getBitWidth());
-
- return SE->getUnsignedRange(S);
- }
-
- /// Helper function to get a range from LVI for the associated value at
- /// program point \p I.
- ConstantRange
- getConstantRangeFromLVI(Attributor &A,
- const Instruction *CtxI = nullptr) const {
- if (!getAnchorScope())
- return getWorstState(getBitWidth());
-
- LazyValueInfo *LVI =
- A.getInfoCache().getAnalysisResultForFunction<LazyValueAnalysis>(
- *getAnchorScope());
-
- if (!LVI || !CtxI)
- return getWorstState(getBitWidth());
- return LVI->getConstantRange(&getAssociatedValue(),
- const_cast<BasicBlock *>(CtxI->getParent()),
- const_cast<Instruction *>(CtxI));
- }
-
- /// See AAValueConstantRange::getKnownConstantRange(..).
- ConstantRange
- getKnownConstantRange(Attributor &A,
- const Instruction *CtxI = nullptr) const override {
- if (!CtxI || CtxI == getCtxI())
- return getKnown();
-
- ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
- ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
- return getKnown().intersectWith(SCEVR).intersectWith(LVIR);
}
- /// See AAValueConstantRange::getAssumedConstantRange(..).
- ConstantRange
- getAssumedConstantRange(Attributor &A,
- const Instruction *CtxI = nullptr) const override {
- // TODO: Make SCEV use Attributor assumption.
- // We may be able to bound a variable range via assumptions in
- // Attributor. ex.) If x is assumed to be in [1, 3] and y is known to
- // evolve to x^2 + x, then we can say that y is in [2, 12].
-
- if (!CtxI || CtxI == getCtxI())
- return getAssumed();
-
- ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
- ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
- return getAssumed().intersectWith(SCEVR).intersectWith(LVIR);
- }
-
- /// See AbstractAttribute::initialize(..).
- void initialize(Attributor &A) override {
- // Intersect a range given by SCEV.
- intersectKnown(getConstantRangeFromSCEV(A, getCtxI()));
-
- // Intersect a range given by LVI.
- intersectKnown(getConstantRangeFromLVI(A, getCtxI()));
- }
-
- /// Helper function to create MDNode for range metadata.
- static MDNode *
- getMDNodeForConstantRange(Type *Ty, LLVMContext &Ctx,
- const ConstantRange &AssumedConstantRange) {
- Metadata *LowAndHigh[] = {ConstantAsMetadata::get(ConstantInt::get(
- Ty, AssumedConstantRange.getLower())),
- ConstantAsMetadata::get(ConstantInt::get(
- Ty, AssumedConstantRange.getUpper()))};
- return MDNode::get(Ctx, LowAndHigh);
- }
-
- /// Return true if \p Assumed is included in \p KnownRanges.
- static bool isBetterRange(const ConstantRange &Assumed, MDNode *KnownRanges) {
-
- if (Assumed.isFullSet())
- return false;
-
- if (!KnownRanges)
- return true;
-
- // If multiple ranges are annotated in IR, we give up to annotate assumed
- // range for now.
-
- // TODO: If there exists a known range which containts assumed range, we
- // can say assumed range is better.
- if (KnownRanges->getNumOperands() > 2)
- return false;
-
- ConstantInt *Lower =
- mdconst::extract<ConstantInt>(KnownRanges->getOperand(0));
- ConstantInt *Upper =
- mdconst::extract<ConstantInt>(KnownRanges->getOperand(1));
-
- ConstantRange Known(Lower->getValue(), Upper->getValue());
- return Known.contains(Assumed) && Known != Assumed;
- }
-
- /// Helper function to set range metadata.
- static bool
- setRangeMetadataIfisBetterRange(Instruction *I,
- const ConstantRange &AssumedConstantRange) {
- auto *OldRangeMD = I->getMetadata(LLVMContext::MD_range);
- if (isBetterRange(AssumedConstantRange, OldRangeMD)) {
- if (!AssumedConstantRange.isEmptySet()) {
- I->setMetadata(LLVMContext::MD_range,
- getMDNodeForConstantRange(I->getType(), I->getContext(),
- AssumedConstantRange));
- return true;
- }
- }
- return false;
- }
-
- /// See AbstractAttribute::manifest()
- ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
- ConstantRange AssumedConstantRange = getAssumedConstantRange(A);
- assert(!AssumedConstantRange.isFullSet() && "Invalid state");
-
- auto &V = getAssociatedValue();
- if (!AssumedConstantRange.isEmptySet() &&
- !AssumedConstantRange.isSingleElement()) {
- if (Instruction *I = dyn_cast<Instruction>(&V))
- if (isa<CallInst>(I) || isa<LoadInst>(I))
- if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange))
- Changed = ChangeStatus::CHANGED;
- }
-
- return Changed;
- }
-};
-
-struct AAValueConstantRangeArgument final : public AAValueConstantRangeImpl {
-
- AAValueConstantRangeArgument(const IRPosition &IRP)
- : AAValueConstantRangeImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Use AAArgumentFromCallSiteArguments
-
- IntegerRangeState S(getBitWidth());
- clampCallSiteArgumentStates<AAValueConstantRange, IntegerRangeState>(
- A, *this, S);
-
- // TODO: If we know we visited all incoming values, thus no are assumed
- // dead, we can take the known information from the state T.
- return clampStateAndIndicateChange<IntegerRangeState>(this->getState(), S);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_ARG_ATTR(value_range)
- }
-};
-
-struct AAValueConstantRangeReturned : AAValueConstantRangeImpl {
- AAValueConstantRangeReturned(const IRPosition &IRP)
- : AAValueConstantRangeImpl(IRP) {}
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- // TODO: Use AAReturnedFromReturnedValues
-
- // TODO: If we know we visited all returned values, thus no are assumed
- // dead, we can take the known information from the state T.
-
- IntegerRangeState S(getBitWidth());
-
- clampReturnedValueStates<AAValueConstantRange, IntegerRangeState>(A, *this,
- S);
- return clampStateAndIndicateChange<StateType>(this->getState(), S);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FNRET_ATTR(value_range)
- }
-};
-
-struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
- AAValueConstantRangeFloating(const IRPosition &IRP)
- : AAValueConstantRangeImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAValueConstantRange::initialize(A);
- Value &V = getAssociatedValue();
-
- if (auto *C = dyn_cast<ConstantInt>(&V)) {
- unionAssumed(ConstantRange(C->getValue()));
- indicateOptimisticFixpoint();
- return;
- }
-
- if (isa<UndefValue>(&V)) {
- indicateOptimisticFixpoint();
- return;
- }
-
- if (auto *I = dyn_cast<Instruction>(&V))
- if (isa<BinaryOperator>(I) || isa<CmpInst>(I)) {
- Value *LHS = I->getOperand(0);
- Value *RHS = I->getOperand(1);
-
- if (LHS->getType()->isIntegerTy() && RHS->getType()->isIntegerTy())
- return;
- }
-
- // If it is a load instruction with range metadata, use it.
- if (LoadInst *LI = dyn_cast<LoadInst>(&V))
- if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) {
- intersectKnown(getConstantRangeFromMetadata(*RangeMD));
- return;
- }
-
- // Otherwise we give up.
- indicatePessimisticFixpoint();
-
- LLVM_DEBUG(dbgs() << "[Attributor][AAValueConstantRange] We give up: "
- << getAssociatedValue());
- }
-
- bool calculateBinaryOperator(Attributor &A, BinaryOperator *BinOp,
- IntegerRangeState &T, Instruction *CtxI) {
- Value *LHS = BinOp->getOperand(0);
- Value *RHS = BinOp->getOperand(1);
-
- auto &LHSAA =
- A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS));
- auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
-
- auto &RHSAA =
- A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS));
- auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
-
- auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange);
-
- T.unionAssumed(AssumedRange);
-
- // TODO: Track a known state too.
-
- return T.isValidState();
- }
-
- bool calculateCmpInst(Attributor &A, CmpInst *CmpI, IntegerRangeState &T,
- Instruction *CtxI) {
- Value *LHS = CmpI->getOperand(0);
- Value *RHS = CmpI->getOperand(1);
-
- auto &LHSAA =
- A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS));
- auto &RHSAA =
- A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS));
-
- auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
- auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
-
- // If one of them is empty set, we can't decide.
- if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet())
- return true;
-
- bool MustTrue = false, MustFalse = false;
-
- auto AllowedRegion =
- ConstantRange::makeAllowedICmpRegion(CmpI->getPredicate(), RHSAARange);
-
- auto SatisfyingRegion = ConstantRange::makeSatisfyingICmpRegion(
- CmpI->getPredicate(), RHSAARange);
-
- if (AllowedRegion.intersectWith(LHSAARange).isEmptySet())
- MustFalse = true;
-
- if (SatisfyingRegion.contains(LHSAARange))
- MustTrue = true;
-
- assert((!MustTrue || !MustFalse) &&
- "Either MustTrue or MustFalse should be false!");
-
- if (MustTrue)
- T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 1)));
- else if (MustFalse)
- T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 0)));
- else
- T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true));
-
- LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA
- << " " << RHSAA << "\n");
-
- // TODO: Track a known state too.
- return T.isValidState();
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
- Instruction *CtxI = getCtxI();
- auto VisitValueCB = [&](Value &V, IntegerRangeState &T,
- bool Stripped) -> bool {
- Instruction *I = dyn_cast<Instruction>(&V);
- if (!I) {
-
- // If the value is not instruction, we query AA to Attributor.
- const auto &AA =
- A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(V));
-
- // Clamp operator is not used to utilize a program point CtxI.
- T.unionAssumed(AA.getAssumedConstantRange(A, CtxI));
-
- return T.isValidState();
- }
-
- if (auto *BinOp = dyn_cast<BinaryOperator>(I))
- return calculateBinaryOperator(A, BinOp, T, CtxI);
- else if (auto *CmpI = dyn_cast<CmpInst>(I))
- return calculateCmpInst(A, CmpI, T, CtxI);
- else {
- // Give up with other instructions.
- // TODO: Add other instructions
-
- T.indicatePessimisticFixpoint();
- return false;
- }
- };
-
- IntegerRangeState T(getBitWidth());
-
- if (!genericValueTraversal<AAValueConstantRange, IntegerRangeState>(
- A, getIRPosition(), *this, T, VisitValueCB))
- return indicatePessimisticFixpoint();
-
- return clampStateAndIndicateChange(getState(), T);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FLOATING_ATTR(value_range)
- }
-};
-
-struct AAValueConstantRangeFunction : AAValueConstantRangeImpl {
- AAValueConstantRangeFunction(const IRPosition &IRP)
- : AAValueConstantRangeImpl(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable("AAValueConstantRange(Function|CallSite)::updateImpl will "
- "not be called");
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(value_range) }
-};
-
-struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction {
- AAValueConstantRangeCallSite(const IRPosition &IRP)
- : AAValueConstantRangeFunction(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(value_range) }
-};
-
-struct AAValueConstantRangeCallSiteReturned : AAValueConstantRangeReturned {
- AAValueConstantRangeCallSiteReturned(const IRPosition &IRP)
- : AAValueConstantRangeReturned(IRP) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // If it is a load instruction with range metadata, use the metadata.
- if (CallInst *CI = dyn_cast<CallInst>(&getAssociatedValue()))
- if (auto *RangeMD = CI->getMetadata(LLVMContext::MD_range))
- intersectKnown(getConstantRangeFromMetadata(*RangeMD));
-
- AAValueConstantRangeReturned::initialize(A);
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CSRET_ATTR(value_range)
- }
-};
-struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
- AAValueConstantRangeCallSiteArgument(const IRPosition &IRP)
- : AAValueConstantRangeFloating(IRP) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_CSARG_ATTR(value_range)
- }
-};
-/// ----------------------------------------------------------------------------
-/// Attributor
-/// ----------------------------------------------------------------------------
-
-bool Attributor::isAssumedDead(const AbstractAttribute &AA,
- const AAIsDead *LivenessAA) {
- const Instruction *CtxI = AA.getIRPosition().getCtxI();
- if (!CtxI)
- return false;
-
- // TODO: Find a good way to utilize fine and coarse grained liveness
- // information.
- if (!LivenessAA)
- LivenessAA =
- &getAAFor<AAIsDead>(AA, IRPosition::function(*CtxI->getFunction()),
- /* TrackDependence */ false);
-
- // Don't check liveness for AAIsDead.
- if (&AA == LivenessAA)
- return false;
-
- if (!LivenessAA->isAssumedDead(CtxI))
- return false;
-
- // We actually used liveness information so we have to record a dependence.
- recordDependence(*LivenessAA, AA, DepClassTy::OPTIONAL);
-
- return true;
-}
-
-bool Attributor::checkForAllUses(
- const function_ref<bool(const Use &, bool &)> &Pred,
- const AbstractAttribute &QueryingAA, const Value &V) {
const IRPosition &IRP = QueryingAA.getIRPosition();
SmallVector<const Use *, 16> Worklist;
SmallPtrSet<const Use *, 16> Visited;
@@ -5601,10 +646,6 @@ bool Attributor::checkForAllUses(
LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size()
<< " initial uses to check\n");
- if (Worklist.empty())
- return true;
-
- bool AnyDead = false;
const Function *ScopeFn = IRP.getAnchorScope();
const auto *LivenessAA =
ScopeFn ? &getAAFor<AAIsDead>(QueryingAA, IRPosition::function(*ScopeFn),
@@ -5615,14 +656,17 @@ bool Attributor::checkForAllUses(
const Use *U = Worklist.pop_back_val();
if (!Visited.insert(U).second)
continue;
- LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << "\n");
- if (Instruction *UserI = dyn_cast<Instruction>(U->getUser()))
- if (LivenessAA && LivenessAA->isAssumedDead(UserI)) {
- LLVM_DEBUG(dbgs() << "[Attributor] Dead user: " << *UserI << ": "
- << *LivenessAA << "\n");
- AnyDead = true;
- continue;
- }
+ LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
+ << *U->getUser() << "\n");
+ if (isAssumedDead(*U, &QueryingAA, LivenessAA,
+ /* CheckBBLivenessOnly */ false, LivenessDepClass)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
+ continue;
+ }
+ if (U->getUser()->isDroppable()) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n");
+ continue;
+ }
bool Follow = false;
if (!Pred(*U, Follow))
@@ -5633,15 +677,13 @@ bool Attributor::checkForAllUses(
Worklist.push_back(&UU);
}
- if (AnyDead)
- recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
-
return true;
}
-bool Attributor::checkForAllCallSites(
- const function_ref<bool(AbstractCallSite)> &Pred,
- const AbstractAttribute &QueryingAA, bool RequireAllCallSites) {
+bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
+ const AbstractAttribute &QueryingAA,
+ bool RequireAllCallSites,
+ bool &AllCallSitesKnown) {
// We can try to determine information from
// the call sites. However, this is only possible all call sites are known,
// hence the function has internal linkage.
@@ -5650,25 +692,49 @@ bool Attributor::checkForAllCallSites(
if (!AssociatedFunction) {
LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP
<< "\n");
+ AllCallSitesKnown = false;
return false;
}
return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites,
- &QueryingAA);
+ &QueryingAA, AllCallSitesKnown);
}
-bool Attributor::checkForAllCallSites(
- const function_ref<bool(AbstractCallSite)> &Pred, const Function &Fn,
- bool RequireAllCallSites, const AbstractAttribute *QueryingAA) {
+bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
+ const Function &Fn,
+ bool RequireAllCallSites,
+ const AbstractAttribute *QueryingAA,
+ bool &AllCallSitesKnown) {
if (RequireAllCallSites && !Fn.hasLocalLinkage()) {
LLVM_DEBUG(
dbgs()
<< "[Attributor] Function " << Fn.getName()
<< " has no internal linkage, hence not all call sites are known\n");
+ AllCallSitesKnown = false;
return false;
}
- for (const Use &U : Fn.uses()) {
+ // If we do not require all call sites we might not see all.
+ AllCallSitesKnown = RequireAllCallSites;
+
+ SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
+ for (unsigned u = 0; u < Uses.size(); ++u) {
+ const Use &U = *Uses[u];
+ LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in "
+ << *U.getUser() << "\n");
+ if (isAssumedDead(U, QueryingAA, nullptr, /* CheckBBLivenessOnly */ true)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
+ continue;
+ }
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+ if (CE->isCast() && CE->getType()->isPointerTy() &&
+ CE->getType()->getPointerElementType()->isFunctionTy()) {
+ for (const Use &CEU : CE->uses())
+ Uses.push_back(&CEU);
+ continue;
+ }
+ }
+
AbstractCallSite ACS(&U);
if (!ACS) {
LLVM_DEBUG(dbgs() << "[Attributor] Function " << Fn.getName()
@@ -5680,22 +746,6 @@ bool Attributor::checkForAllCallSites(
return false;
}
- Instruction *I = ACS.getInstruction();
- Function *Caller = I->getFunction();
-
- const auto *LivenessAA =
- lookupAAFor<AAIsDead>(IRPosition::function(*Caller), QueryingAA,
- /* TrackDependence */ false);
-
- // Skip dead calls.
- if (LivenessAA && LivenessAA->isAssumedDead(I)) {
- // We actually used liveness information so we have to record a
- // dependence.
- if (QueryingAA)
- recordDependence(*LivenessAA, *QueryingAA, DepClassTy::OPTIONAL);
- continue;
- }
-
const Use *EffectiveUse =
ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
if (!ACS.isCallee(EffectiveUse)) {
@@ -5706,6 +756,24 @@ bool Attributor::checkForAllCallSites(
return false;
}
+ // Make sure the arguments that can be matched between the call site and the
+ // callee argee on their type. It is unlikely they do not and it doesn't
+ // make sense for all attributes to know/care about this.
+ assert(&Fn == ACS.getCalledFunction() && "Expected known callee");
+ unsigned MinArgsParams =
+ std::min(size_t(ACS.getNumArgOperands()), Fn.arg_size());
+ for (unsigned u = 0; u < MinArgsParams; ++u) {
+ Value *CSArgOp = ACS.getCallArgOperand(u);
+ if (CSArgOp && Fn.getArg(u)->getType() != CSArgOp->getType()) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Call site / callee argument type mismatch ["
+ << u << "@" << Fn.getName() << ": "
+ << *Fn.getArg(u)->getType() << " vs. "
+ << *ACS.getCallArgOperand(u)->getType() << "\n");
+ return false;
+ }
+ }
+
if (Pred(ACS))
continue;
@@ -5718,8 +786,7 @@ bool Attributor::checkForAllCallSites(
}
bool Attributor::checkForAllReturnedValuesAndReturnInsts(
- const function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)>
- &Pred,
+ function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
const AbstractAttribute &QueryingAA) {
const IRPosition &IRP = QueryingAA.getIRPosition();
@@ -5741,8 +808,7 @@ bool Attributor::checkForAllReturnedValuesAndReturnInsts(
}
bool Attributor::checkForAllReturnedValues(
- const function_ref<bool(Value &)> &Pred,
- const AbstractAttribute &QueryingAA) {
+ function_ref<bool(Value &)> Pred, const AbstractAttribute &QueryingAA) {
const IRPosition &IRP = QueryingAA.getIRPosition();
const Function *AssociatedFunction = IRP.getAssociatedFunction();
@@ -5761,18 +827,22 @@ bool Attributor::checkForAllReturnedValues(
});
}
-static bool
-checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap,
- const function_ref<bool(Instruction &)> &Pred,
- const AAIsDead *LivenessAA, bool &AnyDead,
- const ArrayRef<unsigned> &Opcodes) {
+static bool checkForAllInstructionsImpl(
+ Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap,
+ function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA,
+ const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes,
+ bool CheckBBLivenessOnly = false) {
for (unsigned Opcode : Opcodes) {
- for (Instruction *I : OpcodeInstMap[Opcode]) {
+ // Check if we have instructions with this opcode at all first.
+ auto *Insts = OpcodeInstMap.lookup(Opcode);
+ if (!Insts)
+ continue;
+
+ for (Instruction *I : *Insts) {
// Skip dead instructions.
- if (LivenessAA && LivenessAA->isAssumedDead(I)) {
- AnyDead = true;
+ if (A && A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
+ CheckBBLivenessOnly))
continue;
- }
if (!Pred(*I))
return false;
@@ -5781,9 +851,10 @@ checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap,
return true;
}
-bool Attributor::checkForAllInstructions(
- const llvm::function_ref<bool(Instruction &)> &Pred,
- const AbstractAttribute &QueryingAA, const ArrayRef<unsigned> &Opcodes) {
+bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
+ const AbstractAttribute &QueryingAA,
+ const ArrayRef<unsigned> &Opcodes,
+ bool CheckBBLivenessOnly) {
const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide instructions we have to have an exact definition.
@@ -5795,24 +866,18 @@ bool Attributor::checkForAllInstructions(
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto &LivenessAA =
getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
- bool AnyDead = false;
auto &OpcodeInstMap =
InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
- if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead,
- Opcodes))
+ if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
+ &LivenessAA, Opcodes, CheckBBLivenessOnly))
return false;
- // If we actually used liveness information so we have to record a dependence.
- if (AnyDead)
- recordDependence(LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
-
return true;
}
bool Attributor::checkForAllReadWriteInstructions(
- const llvm::function_ref<bool(Instruction &)> &Pred,
- AbstractAttribute &QueryingAA) {
+ function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA) {
const Function *AssociatedFunction =
QueryingAA.getIRPosition().getAssociatedFunction();
@@ -5823,28 +888,21 @@ bool Attributor::checkForAllReadWriteInstructions(
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto &LivenessAA =
getAAFor<AAIsDead>(QueryingAA, QueryIRP, /* TrackDependence */ false);
- bool AnyDead = false;
for (Instruction *I :
InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
// Skip dead instructions.
- if (LivenessAA.isAssumedDead(I)) {
- AnyDead = true;
+ if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA))
continue;
- }
if (!Pred(*I))
return false;
}
- // If we actually used liveness information so we have to record a dependence.
- if (AnyDead)
- recordDependence(LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
-
return true;
}
-ChangeStatus Attributor::run(Module &M) {
+void Attributor::runTillFixpoint() {
LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
<< AllAbstractAttributes.size()
<< " abstract attributes.\n");
@@ -5854,12 +912,10 @@ ChangeStatus Attributor::run(Module &M) {
unsigned IterationCounter = 1;
- SmallVector<AbstractAttribute *, 64> ChangedAAs;
+ SmallVector<AbstractAttribute *, 32> ChangedAAs;
SetVector<AbstractAttribute *> Worklist, InvalidAAs;
Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
- bool RecomputeDependences = false;
-
do {
// Remember the size to determine new attributes.
size_t NumAAs = AllAbstractAttributes.size();
@@ -5871,44 +927,35 @@ ChangeStatus Attributor::run(Module &M) {
// to run updates.
for (unsigned u = 0; u < InvalidAAs.size(); ++u) {
AbstractAttribute *InvalidAA = InvalidAAs[u];
- auto &QuerriedAAs = QueryMap[InvalidAA];
+
+ // Check the dependences to fast track invalidation.
LLVM_DEBUG(dbgs() << "[Attributor] InvalidAA: " << *InvalidAA << " has "
- << QuerriedAAs.RequiredAAs.size() << "/"
- << QuerriedAAs.OptionalAAs.size()
- << " required/optional dependences\n");
- for (AbstractAttribute *DepOnInvalidAA : QuerriedAAs.RequiredAAs) {
- AbstractState &DOIAAState = DepOnInvalidAA->getState();
- DOIAAState.indicatePessimisticFixpoint();
- ++NumAttributesFixedDueToRequiredDependences;
- assert(DOIAAState.isAtFixpoint() && "Expected fixpoint state!");
- if (!DOIAAState.isValidState())
- InvalidAAs.insert(DepOnInvalidAA);
+ << InvalidAA->Deps.size()
+ << " required & optional dependences\n");
+ while (!InvalidAA->Deps.empty()) {
+ const auto &Dep = InvalidAA->Deps.back();
+ InvalidAA->Deps.pop_back();
+ AbstractAttribute *DepAA = Dep.getPointer();
+ if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
+ Worklist.insert(DepAA);
+ continue;
+ }
+ DepAA->getState().indicatePessimisticFixpoint();
+ assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!");
+ if (!DepAA->getState().isValidState())
+ InvalidAAs.insert(DepAA);
+ else
+ ChangedAAs.push_back(DepAA);
}
- if (!RecomputeDependences)
- Worklist.insert(QuerriedAAs.OptionalAAs.begin(),
- QuerriedAAs.OptionalAAs.end());
- }
-
- // If dependences (=QueryMap) are recomputed we have to look at all abstract
- // attributes again, regardless of what changed in the last iteration.
- if (RecomputeDependences) {
- LLVM_DEBUG(
- dbgs() << "[Attributor] Run all AAs to recompute dependences\n");
- QueryMap.clear();
- ChangedAAs.clear();
- Worklist.insert(AllAbstractAttributes.begin(),
- AllAbstractAttributes.end());
}
// Add all abstract attributes that are potentially dependent on one that
// changed to the work list.
- for (AbstractAttribute *ChangedAA : ChangedAAs) {
- auto &QuerriedAAs = QueryMap[ChangedAA];
- Worklist.insert(QuerriedAAs.OptionalAAs.begin(),
- QuerriedAAs.OptionalAAs.end());
- Worklist.insert(QuerriedAAs.RequiredAAs.begin(),
- QuerriedAAs.RequiredAAs.end());
- }
+ for (AbstractAttribute *ChangedAA : ChangedAAs)
+ while (!ChangedAA->Deps.empty()) {
+ Worklist.insert(ChangedAA->Deps.back().getPointer());
+ ChangedAA->Deps.pop_back();
+ }
LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter
<< ", Worklist+Dependent size: " << Worklist.size()
@@ -5920,23 +967,17 @@ ChangeStatus Attributor::run(Module &M) {
// Update all abstract attribute in the work list and record the ones that
// changed.
- for (AbstractAttribute *AA : Worklist)
- if (!AA->getState().isAtFixpoint() && !isAssumedDead(*AA, nullptr)) {
- QueriedNonFixAA = false;
- if (AA->update(*this) == ChangeStatus::CHANGED) {
+ for (AbstractAttribute *AA : Worklist) {
+ const auto &AAState = AA->getState();
+ if (!AAState.isAtFixpoint())
+ if (updateAA(*AA) == ChangeStatus::CHANGED)
ChangedAAs.push_back(AA);
- if (!AA->getState().isValidState())
- InvalidAAs.insert(AA);
- } else if (!QueriedNonFixAA) {
- // If the attribute did not query any non-fix information, the state
- // will not change and we can indicate that right away.
- AA->getState().indicateOptimisticFixpoint();
- }
- }
- // Check if we recompute the dependences in the next iteration.
- RecomputeDependences = (DepRecomputeInterval > 0 &&
- IterationCounter % DepRecomputeInterval == 0);
+ // Use the InvalidAAs vector to propagate invalid states fast transitively
+ // without requiring updates.
+ if (!AAState.isValidState())
+ InvalidAAs.insert(AA);
+ }
// Add attributes to the changed set if they have been created in the last
// iteration.
@@ -5955,8 +996,6 @@ ChangeStatus Attributor::run(Module &M) {
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
- size_t NumFinalAAs = AllAbstractAttributes.size();
-
// Reset abstract arguments not settled in a sound fixpoint by now. This
// happens when we stopped the fixpoint iteration early. Note that only the
// ones marked as "changed" *and* the ones transitively depending on them
@@ -5975,11 +1014,10 @@ ChangeStatus Attributor::run(Module &M) {
NumAttributesTimedOut++;
}
- auto &QuerriedAAs = QueryMap[ChangedAA];
- ChangedAAs.append(QuerriedAAs.OptionalAAs.begin(),
- QuerriedAAs.OptionalAAs.end());
- ChangedAAs.append(QuerriedAAs.RequiredAAs.begin(),
- QuerriedAAs.RequiredAAs.end());
+ while (!ChangedAA->Deps.empty()) {
+ ChangedAAs.push_back(ChangedAA->Deps.back().getPointer());
+ ChangedAA->Deps.pop_back();
+ }
}
LLVM_DEBUG({
@@ -5988,6 +1026,19 @@ ChangeStatus Attributor::run(Module &M) {
<< " abstract attributes.\n";
});
+ if (VerifyMaxFixpointIterations &&
+ IterationCounter != MaxFixpointIterations) {
+ errs() << "\n[Attributor] Fixpoint iteration done after: "
+ << IterationCounter << "/" << MaxFixpointIterations
+ << " iterations\n";
+ llvm_unreachable("The fixpoint was not reached with exactly the number of "
+ "specified iterations!");
+ }
+}
+
+ChangeStatus Attributor::manifestAttributes() {
+ size_t NumFinalAAs = AllAbstractAttributes.size();
+
unsigned NumManifested = 0;
unsigned NumAtFixpoint = 0;
ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
@@ -6006,12 +1057,14 @@ ChangeStatus Attributor::run(Module &M) {
continue;
// Skip dead code.
- if (isAssumedDead(*AA, nullptr))
+ if (isAssumedDead(*AA, nullptr, /* CheckBBLivenessOnly */ true))
continue;
// Manifest the state and record if we changed the IR.
ChangeStatus LocalChange = AA->manifest(*this);
if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled())
AA->trackStatistics();
+ LLVM_DEBUG(dbgs() << "[Attributor] Manifest " << LocalChange << " : " << *AA
+ << "\n");
ManifestChange = ManifestChange | LocalChange;
@@ -6029,160 +1082,298 @@ ChangeStatus Attributor::run(Module &M) {
NumAttributesValidFixpoint += NumAtFixpoint;
(void)NumFinalAAs;
- assert(
- NumFinalAAs == AllAbstractAttributes.size() &&
- "Expected the final number of abstract attributes to remain unchanged!");
+ if (NumFinalAAs != AllAbstractAttributes.size()) {
+ for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u)
+ errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u]
+ << " :: "
+ << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue()
+ << "\n";
+ llvm_unreachable("Expected the final number of abstract attributes to "
+ "remain unchanged!");
+ }
+ return ManifestChange;
+}
+ChangeStatus Attributor::cleanupIR() {
// Delete stuff at the end to avoid invalid references and a nice order.
- {
- LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
- << ToBeDeletedFunctions.size() << " functions and "
- << ToBeDeletedBlocks.size() << " blocks and "
- << ToBeDeletedInsts.size() << " instructions and "
- << ToBeChangedUses.size() << " uses\n");
-
- SmallVector<Instruction *, 32> DeadInsts;
- SmallVector<Instruction *, 32> TerminatorsToFold;
-
- for (auto &It : ToBeChangedUses) {
- Use *U = It.first;
- Value *NewV = It.second;
- Value *OldV = U->get();
- LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser()
- << " instead of " << *OldV << "\n");
- U->set(NewV);
- if (Instruction *I = dyn_cast<Instruction>(OldV))
- if (!isa<PHINode>(I) && !ToBeDeletedInsts.count(I) &&
- isInstructionTriviallyDead(I)) {
- DeadInsts.push_back(I);
- }
- if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) {
- Instruction *UserI = cast<Instruction>(U->getUser());
- if (isa<UndefValue>(NewV)) {
- ToBeChangedToUnreachableInsts.insert(UserI);
- } else {
- TerminatorsToFold.push_back(UserI);
- }
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least "
+ << ToBeDeletedFunctions.size() << " functions and "
+ << ToBeDeletedBlocks.size() << " blocks and "
+ << ToBeDeletedInsts.size() << " instructions and "
+ << ToBeChangedUses.size() << " uses\n");
+
+ SmallVector<WeakTrackingVH, 32> DeadInsts;
+ SmallVector<Instruction *, 32> TerminatorsToFold;
+
+ for (auto &It : ToBeChangedUses) {
+ Use *U = It.first;
+ Value *NewV = It.second;
+ Value *OldV = U->get();
+
+ // Do not replace uses in returns if the value is a must-tail call we will
+ // not delete.
+ if (isa<ReturnInst>(U->getUser()))
+ if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts()))
+ if (CI->isMustTailCall() && !ToBeDeletedInsts.count(CI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser()
+ << " instead of " << *OldV << "\n");
+ U->set(NewV);
+ // Do not modify call instructions outside the SCC.
+ if (auto *CB = dyn_cast<CallBase>(OldV))
+ if (!Functions.count(CB->getCaller()))
+ continue;
+ if (Instruction *I = dyn_cast<Instruction>(OldV)) {
+ CGModifiedFunctions.insert(I->getFunction());
+ if (!isa<PHINode>(I) && !ToBeDeletedInsts.count(I) &&
+ isInstructionTriviallyDead(I))
+ DeadInsts.push_back(I);
+ }
+ if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) {
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ if (isa<UndefValue>(NewV)) {
+ ToBeChangedToUnreachableInsts.insert(UserI);
+ } else {
+ TerminatorsToFold.push_back(UserI);
}
}
- for (auto &V : InvokeWithDeadSuccessor)
- if (InvokeInst *II = dyn_cast_or_null<InvokeInst>(V)) {
- bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind);
- bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn);
- bool Invoke2CallAllowed =
- !AAIsDeadFunction::mayCatchAsynchronousExceptions(
- *II->getFunction());
- assert((UnwindBBIsDead || NormalBBIsDead) &&
- "Invoke does not have dead successors!");
- BasicBlock *BB = II->getParent();
- BasicBlock *NormalDestBB = II->getNormalDest();
- if (UnwindBBIsDead) {
- Instruction *NormalNextIP = &NormalDestBB->front();
- if (Invoke2CallAllowed) {
- changeToCall(II);
- NormalNextIP = BB->getTerminator();
- }
- if (NormalBBIsDead)
- ToBeChangedToUnreachableInsts.insert(NormalNextIP);
- } else {
- assert(NormalBBIsDead && "Broken invariant!");
- if (!NormalDestBB->getUniquePredecessor())
- NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
- ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front());
+ }
+ for (auto &V : InvokeWithDeadSuccessor)
+ if (InvokeInst *II = dyn_cast_or_null<InvokeInst>(V)) {
+ bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind);
+ bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn);
+ bool Invoke2CallAllowed =
+ !AAIsDead::mayCatchAsynchronousExceptions(*II->getFunction());
+ assert((UnwindBBIsDead || NormalBBIsDead) &&
+ "Invoke does not have dead successors!");
+ BasicBlock *BB = II->getParent();
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ if (UnwindBBIsDead) {
+ Instruction *NormalNextIP = &NormalDestBB->front();
+ if (Invoke2CallAllowed) {
+ changeToCall(II);
+ NormalNextIP = BB->getTerminator();
}
+ if (NormalBBIsDead)
+ ToBeChangedToUnreachableInsts.insert(NormalNextIP);
+ } else {
+ assert(NormalBBIsDead && "Broken invariant!");
+ if (!NormalDestBB->getUniquePredecessor())
+ NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
+ ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front());
}
- for (auto &V : ToBeChangedToUnreachableInsts)
- if (Instruction *I = dyn_cast_or_null<Instruction>(V))
- changeToUnreachable(I, /* UseLLVMTrap */ false);
- for (Instruction *I : TerminatorsToFold)
- ConstantFoldTerminator(I->getParent());
-
- for (Instruction *I : ToBeDeletedInsts) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ }
+ for (Instruction *I : TerminatorsToFold) {
+ CGModifiedFunctions.insert(I->getFunction());
+ ConstantFoldTerminator(I->getParent());
+ }
+ for (auto &V : ToBeChangedToUnreachableInsts)
+ if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ CGModifiedFunctions.insert(I->getFunction());
+ changeToUnreachable(I, /* UseLLVMTrap */ false);
+ }
+
+ for (auto &V : ToBeDeletedInsts) {
+ if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ I->dropDroppableUses();
+ CGModifiedFunctions.insert(I->getFunction());
+ if (!I->getType()->isVoidTy())
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
if (!isa<PHINode>(I) && isInstructionTriviallyDead(I))
DeadInsts.push_back(I);
else
I->eraseFromParent();
}
+ }
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
-
- if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
- SmallVector<BasicBlock *, 8> ToBeDeletedBBs;
- ToBeDeletedBBs.reserve(NumDeadBlocks);
- ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end());
- // Actually we do not delete the blocks but squash them into a single
- // unreachable but untangling branches that jump here is something we need
- // to do in a more generic way.
- DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
- STATS_DECL(AAIsDead, BasicBlock, "Number of dead basic blocks deleted.");
- BUILD_STAT_NAME(AAIsDead, BasicBlock) += ToBeDeletedBlocks.size();
- }
+ LLVM_DEBUG(dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size()
+ << "\n");
- // Identify dead internal functions and delete them. This happens outside
- // the other fixpoint analysis as we might treat potentially dead functions
- // as live to lower the number of iterations. If they happen to be dead, the
- // below fixpoint loop will identify and eliminate them.
- SmallVector<Function *, 8> InternalFns;
- for (Function &F : M)
- if (F.hasLocalLinkage())
- InternalFns.push_back(&F);
-
- bool FoundDeadFn = true;
- while (FoundDeadFn) {
- FoundDeadFn = false;
- for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
- Function *F = InternalFns[u];
- if (!F)
- continue;
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+
+ if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
+ SmallVector<BasicBlock *, 8> ToBeDeletedBBs;
+ ToBeDeletedBBs.reserve(NumDeadBlocks);
+ for (BasicBlock *BB : ToBeDeletedBlocks) {
+ CGModifiedFunctions.insert(BB->getParent());
+ ToBeDeletedBBs.push_back(BB);
+ }
+ // Actually we do not delete the blocks but squash them into a single
+ // unreachable but untangling branches that jump here is something we need
+ // to do in a more generic way.
+ DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
+ }
+
+ // Identify dead internal functions and delete them. This happens outside
+ // the other fixpoint analysis as we might treat potentially dead functions
+ // as live to lower the number of iterations. If they happen to be dead, the
+ // below fixpoint loop will identify and eliminate them.
+ SmallVector<Function *, 8> InternalFns;
+ for (Function *F : Functions)
+ if (F->hasLocalLinkage())
+ InternalFns.push_back(F);
+
+ bool FoundDeadFn = true;
+ while (FoundDeadFn) {
+ FoundDeadFn = false;
+ for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
+ Function *F = InternalFns[u];
+ if (!F)
+ continue;
- if (!checkForAllCallSites(
- [this](AbstractCallSite ACS) {
- return ToBeDeletedFunctions.count(
- ACS.getInstruction()->getFunction());
- },
- *F, true, nullptr))
- continue;
+ bool AllCallSitesKnown;
+ if (!checkForAllCallSites(
+ [this](AbstractCallSite ACS) {
+ return ToBeDeletedFunctions.count(
+ ACS.getInstruction()->getFunction());
+ },
+ *F, true, nullptr, AllCallSitesKnown))
+ continue;
- ToBeDeletedFunctions.insert(F);
- InternalFns[u] = nullptr;
- FoundDeadFn = true;
- }
+ ToBeDeletedFunctions.insert(F);
+ InternalFns[u] = nullptr;
+ FoundDeadFn = true;
}
}
- STATS_DECL(AAIsDead, Function, "Number of dead functions deleted.");
- BUILD_STAT_NAME(AAIsDead, Function) += ToBeDeletedFunctions.size();
-
// Rewrite the functions as requested during manifest.
- ManifestChange = ManifestChange | rewriteFunctionSignatures();
+ ChangeStatus ManifestChange = rewriteFunctionSignatures(CGModifiedFunctions);
- for (Function *Fn : ToBeDeletedFunctions) {
- Fn->deleteBody();
- Fn->replaceAllUsesWith(UndefValue::get(Fn->getType()));
- Fn->eraseFromParent();
- }
+ for (Function *Fn : CGModifiedFunctions)
+ CGUpdater.reanalyzeFunction(*Fn);
- if (VerifyMaxFixpointIterations &&
- IterationCounter != MaxFixpointIterations) {
- errs() << "\n[Attributor] Fixpoint iteration done after: "
- << IterationCounter << "/" << MaxFixpointIterations
- << " iterations\n";
- llvm_unreachable("The fixpoint was not reached with exactly the number of "
- "specified iterations!");
+ for (Function *Fn : ToBeDeletedFunctions)
+ CGUpdater.removeFunction(*Fn);
+
+ NumFnDeleted += ToBeDeletedFunctions.size();
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted
+ << " functions after manifest.\n");
+
+#ifdef EXPENSIVE_CHECKS
+ for (Function *F : Functions) {
+ if (ToBeDeletedFunctions.count(F))
+ continue;
+ assert(!verifyFunction(*F, &errs()) && "Module verification failed!");
}
+#endif
return ManifestChange;
}
-bool Attributor::registerFunctionSignatureRewrite(
- Argument &Arg, ArrayRef<Type *> ReplacementTypes,
- ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
- ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
+ChangeStatus Attributor::run() {
+ SeedingPeriod = false;
+ runTillFixpoint();
+ ChangeStatus ManifestChange = manifestAttributes();
+ ChangeStatus CleanupChange = cleanupIR();
+ return ManifestChange | CleanupChange;
+}
+
+ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
+ // Use a new dependence vector for this update.
+ DependenceVector DV;
+ DependenceStack.push_back(&DV);
+
+ auto &AAState = AA.getState();
+ ChangeStatus CS = ChangeStatus::UNCHANGED;
+ if (!isAssumedDead(AA, nullptr, /* CheckBBLivenessOnly */ true))
+ CS = AA.update(*this);
+
+ if (DV.empty()) {
+ // If the attribute did not query any non-fix information, the state
+ // will not change and we can indicate that right away.
+ AAState.indicateOptimisticFixpoint();
+ }
+
+ if (!AAState.isAtFixpoint())
+ rememberDependences();
+
+ // Verify the stack was used properly, that is we pop the dependence vector we
+ // put there earlier.
+ DependenceVector *PoppedDV = DependenceStack.pop_back_val();
+ (void)PoppedDV;
+ assert(PoppedDV == &DV && "Inconsistent usage of the dependence stack!");
+
+ return CS;
+}
+
+/// Create a shallow wrapper for \p F such that \p F has internal linkage
+/// afterwards. It also sets the original \p F 's name to anonymous
+///
+/// A wrapper is a function with the same type (and attributes) as \p F
+/// that will only call \p F and return the result, if any.
+///
+/// Assuming the declaration of looks like:
+/// rty F(aty0 arg0, ..., atyN argN);
+///
+/// The wrapper will then look as follows:
+/// rty wrapper(aty0 arg0, ..., atyN argN) {
+/// return F(arg0, ..., argN);
+/// }
+///
+static void createShallowWrapper(Function &F) {
+ assert(AllowShallowWrappers &&
+ "Cannot create a wrapper if it is not allowed!");
+ assert(!F.isDeclaration() && "Cannot create a wrapper around a declaration!");
+
+ Module &M = *F.getParent();
+ LLVMContext &Ctx = M.getContext();
+ FunctionType *FnTy = F.getFunctionType();
+
+ Function *Wrapper =
+ Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), F.getName());
+ F.setName(""); // set the inside function anonymous
+ M.getFunctionList().insert(F.getIterator(), Wrapper);
+
+ F.setLinkage(GlobalValue::InternalLinkage);
+
+ F.replaceAllUsesWith(Wrapper);
+ assert(F.use_empty() && "Uses remained after wrapper was created!");
+
+ // Move the COMDAT section to the wrapper.
+ // TODO: Check if we need to keep it for F as well.
+ Wrapper->setComdat(F.getComdat());
+ F.setComdat(nullptr);
+
+ // Copy all metadata and attributes but keep them on F as well.
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F.getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ Wrapper->addMetadata(MDIt.first, *MDIt.second);
+ Wrapper->setAttributes(F.getAttributes());
+
+ // Create the call in the wrapper.
+ BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper);
+
+ SmallVector<Value *, 8> Args;
+ auto FArgIt = F.arg_begin();
+ for (Argument &Arg : Wrapper->args()) {
+ Args.push_back(&Arg);
+ Arg.setName((FArgIt++)->getName());
+ }
+
+ CallInst *CI = CallInst::Create(&F, Args, "", EntryBB);
+ CI->setTailCall(true);
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
+ ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
+
+ NumFnShallowWrapperCreated++;
+}
+
+bool Attributor::isValidFunctionSignatureRewrite(
+ Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
+ // Forbid the call site to cast the function return type. If we need to
+ // rewrite these functions we need to re-create a cast for the new call site
+ // (if the old had uses).
+ if (!ACS.getCalledFunction() ||
+ ACS.getInstruction()->getType() !=
+ ACS.getCalledFunction()->getReturnType())
+ return false;
// Forbid must-tail calls for now.
- return !ACS.isCallbackCall() && !ACS.getCallSite().isMustTailCall();
+ return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
};
Function *Fn = Arg.getParent();
@@ -6196,14 +1387,17 @@ bool Attributor::registerFunctionSignatureRewrite(
AttributeList FnAttributeList = Fn->getAttributes();
if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
- FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
+ FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
+ FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
LLVM_DEBUG(
dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
return false;
}
// Avoid callbacks for now.
- if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr)) {
+ bool AllCallSitesKnown;
+ if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr,
+ AllCallSitesKnown)) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n");
return false;
}
@@ -6216,21 +1410,35 @@ bool Attributor::registerFunctionSignatureRewrite(
// Forbid must-tail calls for now.
// TODO:
- bool AnyDead;
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
- if (!checkForAllInstructionsImpl(OpcodeInstMap, InstPred, nullptr, AnyDead,
- {Instruction::Call})) {
+ if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr,
+ nullptr, {Instruction::Call})) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite due to instructions\n");
return false;
}
- SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = ArgumentReplacementMap[Fn];
- if (ARIs.size() == 0)
+ return true;
+}
+
+bool Attributor::registerFunctionSignatureRewrite(
+ Argument &Arg, ArrayRef<Type *> ReplacementTypes,
+ ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
+ ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
+ << Arg.getParent()->getName() << " with "
+ << ReplacementTypes.size() << " replacements\n");
+ assert(isValidFunctionSignatureRewrite(Arg, ReplacementTypes) &&
+ "Cannot register an invalid rewrite");
+
+ Function *Fn = Arg.getParent();
+ SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs =
+ ArgumentReplacementMap[Fn];
+ if (ARIs.empty())
ARIs.resize(Fn->arg_size());
// If we have a replacement already with less than or equal new arguments,
// ignore this request.
- ArgumentReplacementInfo *&ARI = ARIs[Arg.getArgNo()];
+ std::unique_ptr<ArgumentReplacementInfo> &ARI = ARIs[Arg.getArgNo()];
if (ARI && ARI->getNumReplacementArgs() <= ReplacementTypes.size()) {
LLVM_DEBUG(dbgs() << "[Attributor] Existing rewrite is preferred\n");
return false;
@@ -6238,18 +1446,28 @@ bool Attributor::registerFunctionSignatureRewrite(
// If we have a replacement already but we like the new one better, delete
// the old.
- if (ARI)
- delete ARI;
+ ARI.reset();
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
+ << Arg.getParent()->getName() << " with "
+ << ReplacementTypes.size() << " replacements\n");
// Remember the replacement.
- ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes,
- std::move(CalleeRepairCB),
- std::move(ACSRepairCB));
+ ARI.reset(new ArgumentReplacementInfo(*this, Arg, ReplacementTypes,
+ std::move(CalleeRepairCB),
+ std::move(ACSRepairCB)));
return true;
}
-ChangeStatus Attributor::rewriteFunctionSignatures() {
+bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
+ if (SeedAllowList.size() == 0)
+ return true;
+ return std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
+}
+
+ChangeStatus Attributor::rewriteFunctionSignatures(
+ SmallPtrSetImpl<Function *> &ModifiedFns) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (auto &It : ArgumentReplacementMap) {
@@ -6259,7 +1477,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
if (ToBeDeletedFunctions.count(OldFn))
continue;
- const SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = It.getSecond();
+ const SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs =
+ It.getSecond();
assert(ARIs.size() == OldFn->arg_size() && "Inconsistent state!");
SmallVector<Type *, 16> NewArgumentTypes;
@@ -6268,7 +1487,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
// Collect replacement argument types and copy over existing attributes.
AttributeList OldFnAttributeList = OldFn->getAttributes();
for (Argument &Arg : OldFn->args()) {
- if (ArgumentReplacementInfo *ARI = ARIs[Arg.getArgNo()]) {
+ if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
+ ARIs[Arg.getArgNo()]) {
NewArgumentTypes.append(ARI->ReplacementTypes.begin(),
ARI->ReplacementTypes.end());
NewArgumentAttributes.append(ARI->getNumReplacementArgs(),
@@ -6315,6 +1535,14 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
NewFn->getBasicBlockList().splice(NewFn->begin(),
OldFn->getBasicBlockList());
+ // Fixup block addresses to reference new function.
+ SmallVector<BlockAddress *, 8u> BlockAddresses;
+ for (User *U : OldFn->users())
+ if (auto *BA = dyn_cast<BlockAddress>(U))
+ BlockAddresses.push_back(BA);
+ for (auto *BA : BlockAddresses)
+ BA->replaceAllUsesWith(BlockAddress::get(NewFn, BA->getBasicBlock()));
+
// Set of all "call-like" instructions that invoke the old function mapped
// to their new replacements.
SmallVector<std::pair<CallBase *, CallBase *>, 8> CallSitePairs;
@@ -6330,7 +1558,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum) {
unsigned NewFirstArgNum = NewArgOperands.size();
(void)NewFirstArgNum; // only used inside assert.
- if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) {
+ if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
+ ARIs[OldArgNum]) {
if (ARI->ACSRepairCB)
ARI->ACSRepairCB(*ARI, ACS, NewArgOperands);
assert(ARI->getNumReplacementArgs() + NewFirstArgNum ==
@@ -6369,11 +1598,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
}
// Copy over various properties and the new attributes.
- uint64_t W;
- if (OldCB->extractProfTotalWeight(W))
- NewCB->setProfWeight(W);
+ NewCB->copyMetadata(*OldCB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
NewCB->setCallingConv(OldCB->getCallingConv());
- NewCB->setDebugLoc(OldCB->getDebugLoc());
NewCB->takeName(OldCB);
NewCB->setAttributes(AttributeList::get(
Ctx, OldCallAttributeList.getFnAttributes(),
@@ -6384,8 +1610,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
};
// Use the CallSiteReplacementCreator to create replacement call sites.
- bool Success =
- checkForAllCallSites(CallSiteReplacementCreator, *OldFn, true, nullptr);
+ bool AllCallSitesKnown;
+ bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn,
+ true, nullptr, AllCallSitesKnown);
(void)Success;
assert(Success && "Assumed call site replacement to succeed!");
@@ -6394,7 +1621,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
auto NewFnArgIt = NewFn->arg_begin();
for (unsigned OldArgNum = 0; OldArgNum < ARIs.size();
++OldArgNum, ++OldFnArgIt) {
- if (ArgumentReplacementInfo *ARI = ARIs[OldArgNum]) {
+ if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
+ ARIs[OldArgNum]) {
if (ARI->CalleeRepairCB)
ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt);
NewFnArgIt += ARI->ReplacementTypes.size();
@@ -6409,11 +1637,21 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
for (auto &CallSitePair : CallSitePairs) {
CallBase &OldCB = *CallSitePair.first;
CallBase &NewCB = *CallSitePair.second;
+ assert(OldCB.getType() == NewCB.getType() &&
+ "Cannot handle call sites with different types!");
+ ModifiedFns.insert(OldCB.getFunction());
+ CGUpdater.replaceCallSite(OldCB, NewCB);
OldCB.replaceAllUsesWith(&NewCB);
OldCB.eraseFromParent();
}
- ToBeDeletedFunctions.insert(OldFn);
+ // Replace the function in the call graph (if any).
+ CGUpdater.replaceFunctionWith(*OldFn, *NewFn);
+
+ // If the old function was modified and needed to be reanalyzed, the new one
+ // does now.
+ if (ModifiedFns.erase(OldFn))
+ ModifiedFns.insert(NewFn);
Changed = ChangeStatus::CHANGED;
}
@@ -6421,13 +1659,16 @@ ChangeStatus Attributor::rewriteFunctionSignatures() {
return Changed;
}
-void Attributor::initializeInformationCache(Function &F) {
+void InformationCache::initializeInformationCache(const Function &CF,
+ FunctionInfo &FI) {
+ // As we do not modify the function here we can remove the const
+ // withouth breaking implicit assumptions. At the end of the day, we could
+ // initialize the cache eagerly which would look the same to the users.
+ Function &F = const_cast<Function &>(CF);
// Walk all instructions to find interesting instructions that might be
// queried by abstract attributes during their initialization or update.
// This has to happen before we create attributes.
- auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
- auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
for (Instruction &I : instructions(&F)) {
bool IsInterestingOpcode = false;
@@ -6439,15 +1680,23 @@ void Attributor::initializeInformationCache(Function &F) {
// Note: There are no concrete attributes now so this is initially empty.
switch (I.getOpcode()) {
default:
- assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
- "New call site/base instruction type needs to be known int the "
+ assert(!isa<CallBase>(&I) &&
+ "New call base instruction type needs to be known in the "
"Attributor.");
break;
- case Instruction::Load:
- // The alignment of a pointer is interesting for loads.
- case Instruction::Store:
- // The alignment of a pointer is interesting for stores.
case Instruction::Call:
+ // Calls are interesting on their own, additionally:
+ // For `llvm.assume` calls we also fill the KnowledgeMap as we find them.
+ // For `must-tail` calls we remember the caller and callee.
+ if (IntrinsicInst *Assume = dyn_cast<IntrinsicInst>(&I)) {
+ if (Assume->getIntrinsicID() == Intrinsic::assume)
+ fillMapFromAssume(*Assume, KnowledgeMap);
+ } else if (cast<CallInst>(I).isMustTailCall()) {
+ FI.ContainsMustTailCall = true;
+ if (const Function *Callee = cast<CallInst>(I).getCalledFunction())
+ getFunctionInfo(*Callee).CalledViaMustTail = true;
+ }
+ LLVM_FALLTHROUGH;
case Instruction::CallBr:
case Instruction::Invoke:
case Instruction::CleanupRet:
@@ -6457,28 +1706,55 @@ void Attributor::initializeInformationCache(Function &F) {
case Instruction::Br:
case Instruction::Resume:
case Instruction::Ret:
+ case Instruction::Load:
+ // The alignment of a pointer is interesting for loads.
+ case Instruction::Store:
+ // The alignment of a pointer is interesting for stores.
IsInterestingOpcode = true;
}
- if (IsInterestingOpcode)
- InstOpcodeMap[I.getOpcode()].push_back(&I);
+ if (IsInterestingOpcode) {
+ auto *&Insts = FI.OpcodeInstMap[I.getOpcode()];
+ if (!Insts)
+ Insts = new (Allocator) InstructionVectorTy();
+ Insts->push_back(&I);
+ }
if (I.mayReadOrWriteMemory())
- ReadOrWriteInsts.push_back(&I);
+ FI.RWInsts.push_back(&I);
}
+
+ if (F.hasFnAttribute(Attribute::AlwaysInline) &&
+ isInlineViable(F).isSuccess())
+ InlineableFunctions.insert(&F);
+}
+
+InformationCache::FunctionInfo::~FunctionInfo() {
+ // The instruction vectors are allocated using a BumpPtrAllocator, we need to
+ // manually destroy them.
+ for (auto &It : OpcodeInstMap)
+ It.getSecond()->~InstructionVectorTy();
}
void Attributor::recordDependence(const AbstractAttribute &FromAA,
const AbstractAttribute &ToAA,
DepClassTy DepClass) {
+ // If we are outside of an update, thus before the actual fixpoint iteration
+ // started (= when we create AAs), we do not track dependences because we will
+ // put all AAs into the initial worklist anyway.
+ if (DependenceStack.empty())
+ return;
if (FromAA.getState().isAtFixpoint())
return;
+ DependenceStack.back()->push_back({&FromAA, &ToAA, DepClass});
+}
- if (DepClass == DepClassTy::REQUIRED)
- QueryMap[&FromAA].RequiredAAs.insert(
- const_cast<AbstractAttribute *>(&ToAA));
- else
- QueryMap[&FromAA].OptionalAAs.insert(
- const_cast<AbstractAttribute *>(&ToAA));
- QueriedNonFixAA = true;
+void Attributor::rememberDependences() {
+ assert(!DependenceStack.empty() && "No dependences to remember!");
+
+ for (DepInfo &DI : *DependenceStack.back()) {
+ auto &DepAAs = const_cast<AbstractAttribute &>(*DI.FromAA).Deps;
+ DepAAs.push_back(AbstractAttribute::DepTy(
+ const_cast<AbstractAttribute *>(DI.ToAA), unsigned(DI.DepClass)));
+ }
}
void Attributor::identifyDefaultAbstractAttributes(Function &F) {
@@ -6487,6 +1763,17 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (F.isDeclaration())
return;
+ // In non-module runs we need to look at the call sites of a function to
+ // determine if it is part of a must-tail call edge. This will influence what
+ // attributes we can derive.
+ InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F);
+ if (!isModulePass() && !FI.CalledViaMustTail) {
+ for (const Use &U : F.uses())
+ if (const auto *CB = dyn_cast<CallBase>(U.getUser()))
+ if (CB->isCallee(&U) && CB->isMustTailCall())
+ FI.CalledViaMustTail = true;
+ }
+
IRPosition FPos = IRPosition::function(F);
// Check for dead BasicBlocks in every function.
@@ -6518,6 +1805,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function might be "readnone/readonly/writeonly/...".
getOrCreateAAFor<AAMemoryBehavior>(FPos);
+ // Every function can be "readnone/argmemonly/inaccessiblememonly/...".
+ getOrCreateAAFor<AAMemoryLocation>(FPos);
+
// Every function might be applicable for Heap-To-Stack conversion.
if (EnableHeapToStack)
getOrCreateAAFor<AAHeapToStack>(FPos);
@@ -6560,6 +1850,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument might be simplified.
getOrCreateAAFor<AAValueSimplify>(ArgPos);
+ // Every argument might be dead.
+ getOrCreateAAFor<AAIsDead>(ArgPos);
+
if (Arg.getType()->isPointerTy()) {
// Every argument with pointer type might be marked nonnull.
getOrCreateAAFor<AANonNull>(ArgPos);
@@ -6582,75 +1875,87 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument with pointer type might be marked nofree.
getOrCreateAAFor<AANoFree>(ArgPos);
+
+ // Every argument with pointer type might be privatizable (or promotable)
+ getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
}
}
auto CallSitePred = [&](Instruction &I) -> bool {
- CallSite CS(&I);
- if (Function *Callee = CS.getCalledFunction()) {
- // Skip declerations except if annotations on their call sites were
- // explicitly requested.
- if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
- !Callee->hasMetadata(LLVMContext::MD_callback))
- return true;
+ auto &CB = cast<CallBase>(I);
+ IRPosition CBRetPos = IRPosition::callsite_returned(CB);
- if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
+ // Call sites might be dead if they do not have side effects and no live
+ // users. The return value might be dead if there are no live users.
+ getOrCreateAAFor<AAIsDead>(CBRetPos);
- IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+ Function *Callee = CB.getCalledFunction();
+ // TODO: Even if the callee is not known now we might be able to simplify
+ // the call/callee.
+ if (!Callee)
+ return true;
- // Call site return values might be dead.
- getOrCreateAAFor<AAIsDead>(CSRetPos);
+ // Skip declarations except if annotations on their call sites were
+ // explicitly requested.
+ if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
+ !Callee->hasMetadata(LLVMContext::MD_callback))
+ return true;
- // Call site return integer values might be limited by a constant range.
- if (Callee->getReturnType()->isIntegerTy()) {
- getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
- }
- }
+ if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) {
- for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
+ IRPosition CBRetPos = IRPosition::callsite_returned(CB);
- IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+ // Call site return integer values might be limited by a constant range.
+ if (Callee->getReturnType()->isIntegerTy())
+ getOrCreateAAFor<AAValueConstantRange>(CBRetPos);
+ }
- // Every call site argument might be dead.
- getOrCreateAAFor<AAIsDead>(CSArgPos);
+ for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) {
- // Call site argument might be simplified.
- getOrCreateAAFor<AAValueSimplify>(CSArgPos);
+ IRPosition CBArgPos = IRPosition::callsite_argument(CB, I);
- if (!CS.getArgument(i)->getType()->isPointerTy())
- continue;
+ // Every call site argument might be dead.
+ getOrCreateAAFor<AAIsDead>(CBArgPos);
- // Call site argument attribute "non-null".
- getOrCreateAAFor<AANonNull>(CSArgPos);
+ // Call site argument might be simplified.
+ getOrCreateAAFor<AAValueSimplify>(CBArgPos);
- // Call site argument attribute "no-alias".
- getOrCreateAAFor<AANoAlias>(CSArgPos);
+ if (!CB.getArgOperand(I)->getType()->isPointerTy())
+ continue;
- // Call site argument attribute "dereferenceable".
- getOrCreateAAFor<AADereferenceable>(CSArgPos);
+ // Call site argument attribute "non-null".
+ getOrCreateAAFor<AANonNull>(CBArgPos);
- // Call site argument attribute "align".
- getOrCreateAAFor<AAAlign>(CSArgPos);
+ // Call site argument attribute "nocapture".
+ getOrCreateAAFor<AANoCapture>(CBArgPos);
- // Call site argument attribute
- // "readnone/readonly/writeonly/..."
- getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
+ // Call site argument attribute "no-alias".
+ getOrCreateAAFor<AANoAlias>(CBArgPos);
- // Call site argument attribute "nofree".
- getOrCreateAAFor<AANoFree>(CSArgPos);
- }
+ // Call site argument attribute "dereferenceable".
+ getOrCreateAAFor<AADereferenceable>(CBArgPos);
+
+ // Call site argument attribute "align".
+ getOrCreateAAFor<AAAlign>(CBArgPos);
+
+ // Call site argument attribute
+ // "readnone/readonly/writeonly/..."
+ getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
+
+ // Call site argument attribute "nofree".
+ getOrCreateAAFor<AANoFree>(CBArgPos);
}
return true;
};
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
- bool Success, AnyDead = false;
+ bool Success;
Success = checkForAllInstructionsImpl(
- OpcodeInstMap, CallSitePred, nullptr, AnyDead,
+ nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr,
{(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
(unsigned)Instruction::Call});
(void)Success;
- assert(Success && !AnyDead && "Expected the check call to be successful!");
+ assert(Success && "Expected the check call to be successful!");
auto LoadStorePred = [&](Instruction &I) -> bool {
if (isa<LoadInst>(I))
@@ -6662,10 +1967,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
return true;
};
Success = checkForAllInstructionsImpl(
- OpcodeInstMap, LoadStorePred, nullptr, AnyDead,
+ nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr,
{(unsigned)Instruction::Load, (unsigned)Instruction::Store});
(void)Success;
- assert(Success && !AnyDead && "Expected the check call to be successful!");
+ assert(Success && "Expected the check call to be successful!");
}
/// Helpers to ease debugging through output streams and print calls.
@@ -6703,14 +2008,6 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
<< Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}";
}
-template <typename base_ty, base_ty BestState, base_ty WorstState>
-raw_ostream &
-llvm::operator<<(raw_ostream &OS,
- const IntegerStateBase<base_ty, BestState, WorstState> &S) {
- return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
- << static_cast<const AbstractState &>(S);
-}
-
raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) {
OS << "range-state(" << S.getBitWidth() << ")<";
S.getKnown().print(OS);
@@ -6740,50 +2037,95 @@ void AbstractAttribute::print(raw_ostream &OS) const {
/// Pass (Manager) Boilerplate
/// ----------------------------------------------------------------------------
-static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) {
- if (DisableAttributor)
+static bool runAttributorOnFunctions(InformationCache &InfoCache,
+ SetVector<Function *> &Functions,
+ AnalysisGetter &AG,
+ CallGraphUpdater &CGUpdater) {
+ if (Functions.empty())
return false;
- LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size()
+ LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << Functions.size()
<< " functions.\n");
// Create an Attributor and initially empty information cache that is filled
// while we identify default attribute opportunities.
- InformationCache InfoCache(M, AG);
- Attributor A(InfoCache, DepRecInterval);
+ Attributor A(Functions, InfoCache, CGUpdater);
- for (Function &F : M)
- A.initializeInformationCache(F);
+ // Create shallow wrappers for all functions that are not IPO amendable
+ if (AllowShallowWrappers)
+ for (Function *F : Functions)
+ if (!A.isFunctionIPOAmendable(*F))
+ createShallowWrapper(*F);
- for (Function &F : M) {
- if (F.hasExactDefinition())
+ for (Function *F : Functions) {
+ if (F->hasExactDefinition())
NumFnWithExactDefinition++;
else
NumFnWithoutExactDefinition++;
// We look at internal functions only on-demand but if any use is not a
- // direct call, we have to do it eagerly.
- if (F.hasLocalLinkage()) {
- if (llvm::all_of(F.uses(), [](const Use &U) {
- return ImmutableCallSite(U.getUser()) &&
- ImmutableCallSite(U.getUser()).isCallee(&U);
+ // direct call or outside the current set of analyzed functions, we have to
+ // do it eagerly.
+ if (F->hasLocalLinkage()) {
+ if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+ return CB && CB->isCallee(&U) &&
+ Functions.count(const_cast<Function *>(CB->getCaller()));
}))
continue;
}
// Populate the Attributor with abstract attribute opportunities in the
// function and the information cache with IR information.
- A.identifyDefaultAbstractAttributes(F);
+ A.identifyDefaultAbstractAttributes(*F);
}
- bool Changed = A.run(M) == ChangeStatus::CHANGED;
- assert(!verifyModule(M, &errs()) && "Module verification failed!");
- return Changed;
+ ChangeStatus Changed = A.run();
+ LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
+ << " functions, result: " << Changed << ".\n");
+ return Changed == ChangeStatus::CHANGED;
}
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
- AnalysisGetter AG(AM);
- if (runAttributorOnModule(M, AG)) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ AnalysisGetter AG(FAM);
+
+ SetVector<Function *> Functions;
+ for (Function &F : M)
+ Functions.insert(&F);
+
+ CallGraphUpdater CGUpdater;
+ BumpPtrAllocator Allocator;
+ InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
+ if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) {
+ // FIXME: Think about passes we will preserve and add them here.
+ return PreservedAnalyses::none();
+ }
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ AnalysisGetter AG(FAM);
+
+ SetVector<Function *> Functions;
+ for (LazyCallGraph::Node &N : C)
+ Functions.insert(&N.getFunction());
+
+ if (Functions.empty())
+ return PreservedAnalyses::all();
+
+ Module &M = *Functions.back()->getParent();
+ CallGraphUpdater CGUpdater;
+ CGUpdater.initialize(CG, C, AM, UR);
+ BumpPtrAllocator Allocator;
+ InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
+ if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) {
// FIXME: Think about passes we will preserve and add them here.
return PreservedAnalyses::none();
}
@@ -6804,7 +2146,14 @@ struct AttributorLegacyPass : public ModulePass {
return false;
AnalysisGetter AG;
- return runAttributorOnModule(M, AG);
+ SetVector<Function *> Functions;
+ for (Function &F : M)
+ Functions.insert(&F);
+
+ CallGraphUpdater CGUpdater;
+ BumpPtrAllocator Allocator;
+ InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
+ return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -6813,158 +2162,65 @@ struct AttributorLegacyPass : public ModulePass {
}
};
-} // end anonymous namespace
+struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
+ CallGraphUpdater CGUpdater;
+ static char ID;
-Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
+ AttributorCGSCCLegacyPass() : CallGraphSCCPass(ID) {
+ initializeAttributorCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
-char AttributorLegacyPass::ID = 0;
+ bool runOnSCC(CallGraphSCC &SCC) override {
+ if (skipSCC(SCC))
+ return false;
-const char AAReturnedValues::ID = 0;
-const char AANoUnwind::ID = 0;
-const char AANoSync::ID = 0;
-const char AANoFree::ID = 0;
-const char AANonNull::ID = 0;
-const char AANoRecurse::ID = 0;
-const char AAWillReturn::ID = 0;
-const char AAUndefinedBehavior::ID = 0;
-const char AANoAlias::ID = 0;
-const char AAReachability::ID = 0;
-const char AANoReturn::ID = 0;
-const char AAIsDead::ID = 0;
-const char AADereferenceable::ID = 0;
-const char AAAlign::ID = 0;
-const char AANoCapture::ID = 0;
-const char AAValueSimplify::ID = 0;
-const char AAHeapToStack::ID = 0;
-const char AAMemoryBehavior::ID = 0;
-const char AAValueConstantRange::ID = 0;
-
-// Macro magic to create the static generator function for attributes that
-// follow the naming scheme.
-
-#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \
- case IRPosition::PK: \
- llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!");
-
-#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \
- case IRPosition::PK: \
- AA = new CLASS##SUFFIX(IRP); \
- break;
+ SetVector<Function *> Functions;
+ for (CallGraphNode *CGN : SCC)
+ if (Function *Fn = CGN->getFunction())
+ if (!Fn->isDeclaration())
+ Functions.insert(Fn);
-#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
- CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
- CLASS *AA = nullptr; \
- switch (IRP.getPositionKind()) { \
- SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
- SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
- SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
- SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
- SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
- SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
- } \
- return *AA; \
- }
+ if (Functions.empty())
+ return false;
-#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
- CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
- CLASS *AA = nullptr; \
- switch (IRP.getPositionKind()) { \
- SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
- SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \
- SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
- } \
- return *AA; \
+ AnalysisGetter AG;
+ CallGraph &CG = const_cast<CallGraph &>(SCC.getCallGraph());
+ CGUpdater.initialize(CG, SCC);
+ Module &M = *Functions.back()->getParent();
+ BumpPtrAllocator Allocator;
+ InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
+ return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater);
}
-#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
- CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
- CLASS *AA = nullptr; \
- switch (IRP.getPositionKind()) { \
- SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
- } \
- return *AA; \
- }
+ bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
-#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
- CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
- CLASS *AA = nullptr; \
- switch (IRP.getPositionKind()) { \
- SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
- SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
- SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
- SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
- SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
- SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
- SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
- } \
- return *AA; \
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // FIXME: Think about passes we will preserve and add them here.
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
}
+};
-#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
- CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
- CLASS *AA = nullptr; \
- switch (IRP.getPositionKind()) { \
- SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
- SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
- SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
- } \
- return *AA; \
- }
+} // end anonymous namespace
+
+Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
+Pass *llvm::createAttributorCGSCCLegacyPass() {
+ return new AttributorCGSCCLegacyPass();
+}
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
-
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
-
-CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
-CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
-CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
-
-CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
-CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
-CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
-
-CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
-
-#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION
-#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION
-#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION
-#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION
-#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION
-#undef SWITCH_PK_CREATE
-#undef SWITCH_PK_INV
+char AttributorLegacyPass::ID = 0;
+char AttributorCGSCCLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_BEGIN(AttributorCGSCCLegacyPass, "attributor-cgscc",
+ "Deduce and propagate attributes (CGSCC pass)", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(AttributorCGSCCLegacyPass, "attributor-cgscc",
+ "Deduce and propagate attributes (CGSCC pass)", false,
+ false)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
new file mode 100644
index 0000000000000..7e9fd61eeb41e
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -0,0 +1,7225 @@
+//===- AttributorAttributes.cpp - Attributes for Attributor deduction -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// See the Attributor.h file comment and the class descriptions in that file for
+// more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Attributor.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/NoFolder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "attributor"
+
+static cl::opt<bool> ManifestInternal(
+ "attributor-manifest-internal", cl::Hidden,
+ cl::desc("Manifest Attributor internal string attributes."),
+ cl::init(false));
+
+static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
+ cl::Hidden);
+
+STATISTIC(NumAAs, "Number of abstract attributes created");
+
+// Some helper macros to deal with statistics tracking.
+//
+// Usage:
+// For simple IR attribute tracking overload trackStatistics in the abstract
+// attribute and choose the right STATS_DECLTRACK_********* macro,
+// e.g.,:
+// void trackStatistics() const override {
+// STATS_DECLTRACK_ARG_ATTR(returned)
+// }
+// If there is a single "increment" side one can use the macro
+// STATS_DECLTRACK with a custom message. If there are multiple increment
+// sides, STATS_DECL and STATS_TRACK can also be used separately.
+//
+#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \
+ ("Number of " #TYPE " marked '" #NAME "'")
+#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME
+#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG);
+#define STATS_DECL(NAME, TYPE, MSG) \
+ STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG);
+#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE));
+#define STATS_DECLTRACK(NAME, TYPE, MSG) \
+ { \
+ STATS_DECL(NAME, TYPE, MSG) \
+ STATS_TRACK(NAME, TYPE) \
+ }
+#define STATS_DECLTRACK_ARG_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME))
+#define STATS_DECLTRACK_CSARG_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CSArguments, \
+ BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME))
+#define STATS_DECLTRACK_FN_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME))
+#define STATS_DECLTRACK_CS_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME))
+#define STATS_DECLTRACK_FNRET_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, FunctionReturn, \
+ BUILD_STAT_MSG_IR_ATTR(function returns, NAME))
+#define STATS_DECLTRACK_CSRET_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, CSReturn, \
+ BUILD_STAT_MSG_IR_ATTR(call site returns, NAME))
+#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \
+ STATS_DECLTRACK(NAME, Floating, \
+ ("Number of floating values known to be '" #NAME "'"))
+
+// Specialization of the operator<< for abstract attributes subclasses. This
+// disambiguates situations where multiple operators are applicable.
+namespace llvm {
+#define PIPE_OPERATOR(CLASS) \
+ raw_ostream &operator<<(raw_ostream &OS, const CLASS &AA) { \
+ return OS << static_cast<const AbstractAttribute &>(AA); \
+ }
+
+PIPE_OPERATOR(AAIsDead)
+PIPE_OPERATOR(AANoUnwind)
+PIPE_OPERATOR(AANoSync)
+PIPE_OPERATOR(AANoRecurse)
+PIPE_OPERATOR(AAWillReturn)
+PIPE_OPERATOR(AANoReturn)
+PIPE_OPERATOR(AAReturnedValues)
+PIPE_OPERATOR(AANonNull)
+PIPE_OPERATOR(AANoAlias)
+PIPE_OPERATOR(AADereferenceable)
+PIPE_OPERATOR(AAAlign)
+PIPE_OPERATOR(AANoCapture)
+PIPE_OPERATOR(AAValueSimplify)
+PIPE_OPERATOR(AANoFree)
+PIPE_OPERATOR(AAHeapToStack)
+PIPE_OPERATOR(AAReachability)
+PIPE_OPERATOR(AAMemoryBehavior)
+PIPE_OPERATOR(AAMemoryLocation)
+PIPE_OPERATOR(AAValueConstantRange)
+PIPE_OPERATOR(AAPrivatizablePtr)
+PIPE_OPERATOR(AAUndefinedBehavior)
+
+#undef PIPE_OPERATOR
+} // namespace llvm
+
+namespace {
+
+static Optional<ConstantInt *>
+getAssumedConstantInt(Attributor &A, const Value &V,
+ const AbstractAttribute &AA,
+ bool &UsedAssumedInformation) {
+ Optional<Constant *> C = A.getAssumedConstant(V, AA, UsedAssumedInformation);
+ if (C.hasValue())
+ return dyn_cast_or_null<ConstantInt>(C.getValue());
+ return llvm::None;
+}
+
+/// Get pointer operand of memory accessing instruction. If \p I is
+/// not a memory accessing instruction, return nullptr. If \p AllowVolatile,
+/// is set to false and the instruction is volatile, return nullptr.
+static const Value *getPointerOperand(const Instruction *I,
+ bool AllowVolatile) {
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (!AllowVolatile && LI->isVolatile())
+ return nullptr;
+ return LI->getPointerOperand();
+ }
+
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (!AllowVolatile && SI->isVolatile())
+ return nullptr;
+ return SI->getPointerOperand();
+ }
+
+ if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (!AllowVolatile && CXI->isVolatile())
+ return nullptr;
+ return CXI->getPointerOperand();
+ }
+
+ if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
+ if (!AllowVolatile && RMWI->isVolatile())
+ return nullptr;
+ return RMWI->getPointerOperand();
+ }
+
+ return nullptr;
+}
+
+/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
+/// advanced by \p Offset bytes. To aid later analysis the method tries to build
+/// getelement pointer instructions that traverse the natural type of \p Ptr if
+/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
+/// through a cast to i8*.
+///
+/// TODO: This could probably live somewhere more prominantly if it doesn't
+/// already exist.
+static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset,
+ IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
+ assert(Offset >= 0 && "Negative offset not supported yet!");
+ LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
+ << "-bytes as " << *ResTy << "\n");
+
+ // The initial type we are trying to traverse to get nice GEPs.
+ Type *Ty = Ptr->getType();
+
+ SmallVector<Value *, 4> Indices;
+ std::string GEPName = Ptr->getName().str();
+ while (Offset) {
+ uint64_t Idx, Rem;
+
+ if (auto *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ if (int64_t(SL->getSizeInBytes()) < Offset)
+ break;
+ Idx = SL->getElementContainingOffset(Offset);
+ assert(Idx < STy->getNumElements() && "Offset calculation error!");
+ Rem = Offset - SL->getElementOffset(Idx);
+ Ty = STy->getElementType(Idx);
+ } else if (auto *PTy = dyn_cast<PointerType>(Ty)) {
+ Ty = PTy->getElementType();
+ if (!Ty->isSized())
+ break;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ assert(ElementSize && "Expected type with size!");
+ Idx = Offset / ElementSize;
+ Rem = Offset % ElementSize;
+ } else {
+ // Non-aggregate type, we cast and make byte-wise progress now.
+ break;
+ }
+
+ LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
+ << " Idx: " << Idx << " Rem: " << Rem << "\n");
+
+ GEPName += "." + std::to_string(Idx);
+ Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
+ Offset = Rem;
+ }
+
+ // Create a GEP if we collected indices above.
+ if (Indices.size())
+ Ptr = IRB.CreateGEP(Ptr, Indices, GEPName);
+
+ // If an offset is left we use byte-wise adjustment.
+ if (Offset) {
+ Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
+ Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset),
+ GEPName + ".b" + Twine(Offset));
+ }
+
+ // Ensure the result has the requested type.
+ Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
+
+ LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
+ return Ptr;
+}
+
+/// Recursively visit all values that might become \p IRP at some point. This
+/// will be done by looking through cast instructions, selects, phis, and calls
+/// with the "returned" attribute. Once we cannot look through the value any
+/// further, the callback \p VisitValueCB is invoked and passed the current
+/// value, the \p State, and a flag to indicate if we stripped anything.
+/// Stripped means that we unpacked the value associated with \p IRP at least
+/// once. Note that the value used for the callback may still be the value
+/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
+/// we will never visit more values than specified by \p MaxValues.
+template <typename AAType, typename StateTy>
+static bool genericValueTraversal(
+ Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State,
+ function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
+ VisitValueCB,
+ const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
+ function_ref<Value *(Value *)> StripCB = nullptr) {
+
+ const AAIsDead *LivenessAA = nullptr;
+ if (IRP.getAnchorScope())
+ LivenessAA = &A.getAAFor<AAIsDead>(
+ QueryingAA, IRPosition::function(*IRP.getAnchorScope()),
+ /* TrackDependence */ false);
+ bool AnyDead = false;
+
+ using Item = std::pair<Value *, const Instruction *>;
+ SmallSet<Item, 16> Visited;
+ SmallVector<Item, 16> Worklist;
+ Worklist.push_back({&IRP.getAssociatedValue(), CtxI});
+
+ int Iteration = 0;
+ do {
+ Item I = Worklist.pop_back_val();
+ Value *V = I.first;
+ CtxI = I.second;
+ if (StripCB)
+ V = StripCB(V);
+
+ // Check if we should process the current value. To prevent endless
+ // recursion keep a record of the values we followed!
+ if (!Visited.insert(I).second)
+ continue;
+
+ // Make sure we limit the compile time for complex expressions.
+ if (Iteration++ >= MaxValues)
+ return false;
+
+ // Explicitly look through calls with a "returned" attribute if we do
+ // not have a pointer as stripPointerCasts only works on them.
+ Value *NewV = nullptr;
+ if (V->getType()->isPointerTy()) {
+ NewV = V->stripPointerCasts();
+ } else {
+ auto *CB = dyn_cast<CallBase>(V);
+ if (CB && CB->getCalledFunction()) {
+ for (Argument &Arg : CB->getCalledFunction()->args())
+ if (Arg.hasReturnedAttr()) {
+ NewV = CB->getArgOperand(Arg.getArgNo());
+ break;
+ }
+ }
+ }
+ if (NewV && NewV != V) {
+ Worklist.push_back({NewV, CtxI});
+ continue;
+ }
+
+ // Look through select instructions, visit both potential values.
+ if (auto *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back({SI->getTrueValue(), CtxI});
+ Worklist.push_back({SI->getFalseValue(), CtxI});
+ continue;
+ }
+
+ // Look through phi nodes, visit all live operands.
+ if (auto *PHI = dyn_cast<PHINode>(V)) {
+ assert(LivenessAA &&
+ "Expected liveness in the presence of instructions!");
+ for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
+ BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
+ if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA,
+ LivenessAA,
+ /* CheckBBLivenessOnly */ true)) {
+ AnyDead = true;
+ continue;
+ }
+ Worklist.push_back(
+ {PHI->getIncomingValue(u), IncomingBB->getTerminator()});
+ }
+ continue;
+ }
+
+ if (UseValueSimplify && !isa<Constant>(V)) {
+ bool UsedAssumedInformation = false;
+ Optional<Constant *> C =
+ A.getAssumedConstant(*V, QueryingAA, UsedAssumedInformation);
+ if (!C.hasValue())
+ continue;
+ if (Value *NewV = C.getValue()) {
+ Worklist.push_back({NewV, CtxI});
+ continue;
+ }
+ }
+
+ // Once a leaf is reached we inform the user through the callback.
+ if (!VisitValueCB(*V, CtxI, State, Iteration > 1))
+ return false;
+ } while (!Worklist.empty());
+
+ // If we actually used liveness information so we have to record a dependence.
+ if (AnyDead)
+ A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
+
+ // All values have been visited.
+ return true;
+}
+
+const Value *stripAndAccumulateMinimalOffsets(
+ Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val,
+ const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
+ bool UseAssumed = false) {
+
+ auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool {
+ const IRPosition &Pos = IRPosition::value(V);
+ // Only track dependence if we are going to use the assumed info.
+ const AAValueConstantRange &ValueConstantRangeAA =
+ A.getAAFor<AAValueConstantRange>(QueryingAA, Pos,
+ /* TrackDependence */ UseAssumed);
+ ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed()
+ : ValueConstantRangeAA.getKnown();
+ // We can only use the lower part of the range because the upper part can
+ // be higher than what the value can really be.
+ ROffset = Range.getSignedMin();
+ return true;
+ };
+
+ return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
+ AttributorAnalysis);
+}
+
+static const Value *getMinimalBaseOfAccsesPointerOperand(
+ Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I,
+ int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) {
+ const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false);
+ if (!Ptr)
+ return nullptr;
+ APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ const Value *Base = stripAndAccumulateMinimalOffsets(
+ A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds);
+
+ BytesOffset = OffsetAPInt.getSExtValue();
+ return Base;
+}
+
+static const Value *
+getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
+ const DataLayout &DL,
+ bool AllowNonInbounds = false) {
+ const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false);
+ if (!Ptr)
+ return nullptr;
+
+ return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL,
+ AllowNonInbounds);
+}
+
+/// Helper function to clamp a state \p S of type \p StateType with the
+/// information in \p R and indicate/return if \p S did change (as-in update is
+/// required to be run again).
+template <typename StateType>
+ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
+ auto Assumed = S.getAssumed();
+ S ^= R;
+ return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+}
+
+/// Clamp the information known for all returned values of a function
+/// (identified by \p QueryingAA) into \p S.
+template <typename AAType, typename StateType = typename AAType::StateType>
+static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
+ StateType &S) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for "
+ << QueryingAA << " into " << S << "\n");
+
+ assert((QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_RETURNED ||
+ QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_CALL_SITE_RETURNED) &&
+ "Can only clamp returned value states for a function returned or call "
+ "site returned position!");
+
+ // Use an optional state as there might not be any return values and we want
+ // to join (IntegerState::operator&) the state of all there are.
+ Optional<StateType> T;
+
+ // Callback for each possibly returned value.
+ auto CheckReturnValue = [&](Value &RV) -> bool {
+ const IRPosition &RVPos = IRPosition::value(RV);
+ const AAType &AA = A.getAAFor<AAType>(QueryingAA, RVPos);
+ LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
+ << " @ " << RVPos << "\n");
+ const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ if (T.hasValue())
+ *T &= AAS;
+ else
+ T = AAS;
+ LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
+ << "\n");
+ return T->isValidState();
+ };
+
+ if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
+ S.indicatePessimisticFixpoint();
+ else if (T.hasValue())
+ S ^= *T;
+}
+
+/// Helper class for generic deduction: return value -> returned position.
+template <typename AAType, typename BaseType,
+ typename StateType = typename BaseType::StateType>
+struct AAReturnedFromReturnedValues : public BaseType {
+ AAReturnedFromReturnedValues(const IRPosition &IRP, Attributor &A)
+ : BaseType(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ StateType S(StateType::getBestState(this->getState()));
+ clampReturnedValueStates<AAType, StateType>(A, *this, S);
+ // TODO: If we know we visited all returned values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
+};
+
+/// Clamp the information known at all call sites for a given argument
+/// (identified by \p QueryingAA) into \p S.
+template <typename AAType, typename StateType = typename AAType::StateType>
+static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
+ StateType &S) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
+ << QueryingAA << " into " << S << "\n");
+
+ assert(QueryingAA.getIRPosition().getPositionKind() ==
+ IRPosition::IRP_ARGUMENT &&
+ "Can only clamp call site argument states for an argument position!");
+
+ // Use an optional state as there might not be any return values and we want
+ // to join (IntegerState::operator&) the state of all there are.
+ Optional<StateType> T;
+
+ // The argument number which is also the call site argument number.
+ unsigned ArgNo = QueryingAA.getIRPosition().getArgNo();
+
+ auto CallSiteCheck = [&](AbstractCallSite ACS) {
+ const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+ // Check if a coresponding argument was found or if it is on not associated
+ // (which can happen for callback calls).
+ if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+ return false;
+
+ const AAType &AA = A.getAAFor<AAType>(QueryingAA, ACSArgPos);
+ LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
+ << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
+ const StateType &AAS = static_cast<const StateType &>(AA.getState());
+ if (T.hasValue())
+ *T &= AAS;
+ else
+ T = AAS;
+ LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
+ << "\n");
+ return T->isValidState();
+ };
+
+ bool AllCallSitesKnown;
+ if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true,
+ AllCallSitesKnown))
+ S.indicatePessimisticFixpoint();
+ else if (T.hasValue())
+ S ^= *T;
+}
+
+/// Helper class for generic deduction: call site argument -> argument position.
+template <typename AAType, typename BaseType,
+ typename StateType = typename AAType::StateType>
+struct AAArgumentFromCallSiteArguments : public BaseType {
+ AAArgumentFromCallSiteArguments(const IRPosition &IRP, Attributor &A)
+ : BaseType(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ StateType S(StateType::getBestState(this->getState()));
+ clampCallSiteArgumentStates<AAType, StateType>(A, *this, S);
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange<StateType>(this->getState(), S);
+ }
+};
+
+/// Helper class for generic replication: function returned -> cs returned.
+template <typename AAType, typename BaseType,
+ typename StateType = typename BaseType::StateType>
+struct AACallSiteReturnedFromReturned : public BaseType {
+ AACallSiteReturnedFromReturned(const IRPosition &IRP, Attributor &A)
+ : BaseType(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ assert(this->getIRPosition().getPositionKind() ==
+ IRPosition::IRP_CALL_SITE_RETURNED &&
+ "Can only wrap function returned positions for call site returned "
+ "positions!");
+ auto &S = this->getState();
+
+ const Function *AssociatedFunction =
+ this->getIRPosition().getAssociatedFunction();
+ if (!AssociatedFunction)
+ return S.indicatePessimisticFixpoint();
+
+ IRPosition FnPos = IRPosition::returned(*AssociatedFunction);
+ const AAType &AA = A.getAAFor<AAType>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ S, static_cast<const StateType &>(AA.getState()));
+ }
+};
+
+/// Helper function to accumulate uses.
+template <class AAType, typename StateType = typename AAType::StateType>
+static void followUsesInContext(AAType &AA, Attributor &A,
+ MustBeExecutedContextExplorer &Explorer,
+ const Instruction *CtxI,
+ SetVector<const Use *> &Uses,
+ StateType &State) {
+ auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
+ for (unsigned u = 0; u < Uses.size(); ++u) {
+ const Use *U = Uses[u];
+ if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
+ bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
+ if (Found && AA.followUseInMBEC(A, U, UserI, State))
+ for (const Use &Us : UserI->uses())
+ Uses.insert(&Us);
+ }
+ }
+}
+
+/// Use the must-be-executed-context around \p I to add information into \p S.
+/// The AAType class is required to have `followUseInMBEC` method with the
+/// following signature and behaviour:
+///
+/// bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I)
+/// U - Underlying use.
+/// I - The user of the \p U.
+/// Returns true if the value should be tracked transitively.
+///
+template <class AAType, typename StateType = typename AAType::StateType>
+static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
+ Instruction &CtxI) {
+
+ // Container for (transitive) uses of the associated value.
+ SetVector<const Use *> Uses;
+ for (const Use &U : AA.getIRPosition().getAssociatedValue().uses())
+ Uses.insert(&U);
+
+ MustBeExecutedContextExplorer &Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
+
+ followUsesInContext<AAType>(AA, A, Explorer, &CtxI, Uses, S);
+
+ if (S.isAtFixpoint())
+ return;
+
+ SmallVector<const BranchInst *, 4> BrInsts;
+ auto Pred = [&](const Instruction *I) {
+ if (const BranchInst *Br = dyn_cast<BranchInst>(I))
+ if (Br->isConditional())
+ BrInsts.push_back(Br);
+ return true;
+ };
+
+ // Here, accumulate conditional branch instructions in the context. We
+ // explore the child paths and collect the known states. The disjunction of
+ // those states can be merged to its own state. Let ParentState_i be a state
+ // to indicate the known information for an i-th branch instruction in the
+ // context. ChildStates are created for its successors respectively.
+ //
+ // ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1}
+ // ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2}
+ // ...
+ // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m}
+ //
+ // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m
+ //
+ // FIXME: Currently, recursive branches are not handled. For example, we
+ // can't deduce that ptr must be dereferenced in below function.
+ //
+ // void f(int a, int c, int *ptr) {
+ // if(a)
+ // if (b) {
+ // *ptr = 0;
+ // } else {
+ // *ptr = 1;
+ // }
+ // else {
+ // if (b) {
+ // *ptr = 0;
+ // } else {
+ // *ptr = 1;
+ // }
+ // }
+ // }
+
+ Explorer.checkForAllContext(&CtxI, Pred);
+ for (const BranchInst *Br : BrInsts) {
+ StateType ParentState;
+
+ // The known state of the parent state is a conjunction of children's
+ // known states so it is initialized with a best state.
+ ParentState.indicateOptimisticFixpoint();
+
+ for (const BasicBlock *BB : Br->successors()) {
+ StateType ChildState;
+
+ size_t BeforeSize = Uses.size();
+ followUsesInContext(AA, A, Explorer, &BB->front(), Uses, ChildState);
+
+ // Erase uses which only appear in the child.
+ for (auto It = Uses.begin() + BeforeSize; It != Uses.end();)
+ It = Uses.erase(It);
+
+ ParentState &= ChildState;
+ }
+
+ // Use only known state.
+ S += ParentState;
+ }
+}
+
+/// -----------------------NoUnwind Function Attribute--------------------------
+
+struct AANoUnwindImpl : AANoUnwind {
+ AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nounwind" : "may-unwind";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto Opcodes = {
+ (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
+ (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+
+ auto CheckForNoUnwind = [&](Instruction &I) {
+ if (!I.mayThrow())
+ return true;
+
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ const auto &NoUnwindAA =
+ A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(*CB));
+ return NoUnwindAA.isAssumedNoUnwind();
+ }
+ return false;
+ };
+
+ if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AANoUnwindFunction final : public AANoUnwindImpl {
+ AANoUnwindFunction(const IRPosition &IRP, Attributor &A)
+ : AANoUnwindImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) }
+};
+
+/// NoUnwind attribute deduction for a call sites.
+struct AANoUnwindCallSite final : AANoUnwindImpl {
+ AANoUnwindCallSite(const IRPosition &IRP, Attributor &A)
+ : AANoUnwindImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoUnwindImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoUnwind::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
+};
+
+/// --------------------- Function Return Values -------------------------------
+
+/// "Attribute" that collects all potential returned values and the return
+/// instructions that they arise from.
+///
+/// If there is a unique returned value R, the manifest method will:
+/// - mark R with the "returned" attribute, if R is an argument.
+class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
+
+ /// Mapping of values potentially returned by the associated function to the
+ /// return instructions that might return them.
+ MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
+
+ /// Mapping to remember the number of returned values for a call site such
+ /// that we can avoid updates if nothing changed.
+ DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA;
+
+ /// Set of unresolved calls returned by the associated function.
+ SmallSetVector<CallBase *, 4> UnresolvedCalls;
+
+ /// State flags
+ ///
+ ///{
+ bool IsFixed = false;
+ bool IsValidState = true;
+ ///}
+
+public:
+ AAReturnedValuesImpl(const IRPosition &IRP, Attributor &A)
+ : AAReturnedValues(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // Reset the state.
+ IsFixed = false;
+ IsValidState = true;
+ ReturnedValues.clear();
+
+ Function *F = getAssociatedFunction();
+ if (!F) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ assert(!F->getReturnType()->isVoidTy() &&
+ "Did not expect a void return type!");
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F);
+
+ // Look through all arguments, if one is marked as returned we are done.
+ for (Argument &Arg : F->args()) {
+ if (Arg.hasReturnedAttr()) {
+ auto &ReturnInstSet = ReturnedValues[&Arg];
+ if (auto *Insts = OpcodeInstMap.lookup(Instruction::Ret))
+ for (Instruction *RI : *Insts)
+ ReturnInstSet.insert(cast<ReturnInst>(RI));
+
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ if (!A.isFunctionIPOAmendable(*F))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override;
+
+ /// See AbstractAttribute::getState(...).
+ AbstractState &getState() override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ const AbstractState &getState() const override { return *this; }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ llvm::iterator_range<iterator> returned_values() override {
+ return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
+ }
+
+ llvm::iterator_range<const_iterator> returned_values() const override {
+ return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
+ }
+
+ const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override {
+ return UnresolvedCalls;
+ }
+
+ /// Return the number of potential return values, -1 if unknown.
+ size_t getNumReturnValues() const override {
+ return isValidState() ? ReturnedValues.size() : -1;
+ }
+
+ /// Return an assumed unique return value if a single candidate is found. If
+ /// there cannot be one, return a nullptr. If it is not clear yet, return the
+ /// Optional::NoneType.
+ Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
+
+ /// See AbstractState::checkForAllReturnedValues(...).
+ bool checkForAllReturnedValuesAndReturnInsts(
+ function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
+ const override;
+
+ /// Pretty print the attribute similar to the IR representation.
+ const std::string getAsStr() const override;
+
+ /// See AbstractState::isAtFixpoint().
+ bool isAtFixpoint() const override { return IsFixed; }
+
+ /// See AbstractState::isValidState().
+ bool isValidState() const override { return IsValidState; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...).
+ ChangeStatus indicateOptimisticFixpoint() override {
+ IsFixed = true;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ IsFixed = true;
+ IsValidState = false;
+ return ChangeStatus::CHANGED;
+ }
+};
+
+ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ // Bookkeeping.
+ assert(isValidState());
+ STATS_DECLTRACK(KnownReturnValues, FunctionReturn,
+ "Number of function with known return values");
+
+ // Check if we have an assumed unique return value that we could manifest.
+ Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
+
+ if (!UniqueRV.hasValue() || !UniqueRV.getValue())
+ return Changed;
+
+ // Bookkeeping.
+ STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
+ "Number of function with unique return");
+
+ // Callback to replace the uses of CB with the constant C.
+ auto ReplaceCallSiteUsersWith = [&A](CallBase &CB, Constant &C) {
+ if (CB.use_empty())
+ return ChangeStatus::UNCHANGED;
+ if (A.changeValueAfterManifest(CB, C))
+ return ChangeStatus::CHANGED;
+ return ChangeStatus::UNCHANGED;
+ };
+
+ // If the assumed unique return value is an argument, annotate it.
+ if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
+ if (UniqueRVArg->getType()->canLosslesslyBitCastTo(
+ getAssociatedFunction()->getReturnType())) {
+ getIRPosition() = IRPosition::argument(*UniqueRVArg);
+ Changed = IRAttribute::manifest(A);
+ }
+ } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) {
+ // We can replace the returned value with the unique returned constant.
+ Value &AnchorValue = getAnchorValue();
+ if (Function *F = dyn_cast<Function>(&AnchorValue)) {
+ for (const Use &U : F->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser()))
+ if (CB->isCallee(&U)) {
+ Constant *RVCCast =
+ CB->getType() == RVC->getType()
+ ? RVC
+ : ConstantExpr::getTruncOrBitCast(RVC, CB->getType());
+ Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed;
+ }
+ } else {
+ assert(isa<CallBase>(AnchorValue) &&
+ "Expcected a function or call base anchor!");
+ Constant *RVCCast =
+ AnchorValue.getType() == RVC->getType()
+ ? RVC
+ : ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType());
+ Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast);
+ }
+ if (Changed == ChangeStatus::CHANGED)
+ STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn,
+ "Number of function returns replaced by constant return");
+ }
+
+ return Changed;
+}
+
+const std::string AAReturnedValuesImpl::getAsStr() const {
+ return (isAtFixpoint() ? "returns(#" : "may-return(#") +
+ (isValidState() ? std::to_string(getNumReturnValues()) : "?") +
+ ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]";
+}
+
+Optional<Value *>
+AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const {
+ // If checkForAllReturnedValues provides a unique value, ignoring potential
+ // undef values that can also be present, it is assumed to be the actual
+ // return value and forwarded to the caller of this method. If there are
+ // multiple, a nullptr is returned indicating there cannot be a unique
+ // returned value.
+ Optional<Value *> UniqueRV;
+
+ auto Pred = [&](Value &RV) -> bool {
+ // If we found a second returned value and neither the current nor the saved
+ // one is an undef, there is no unique returned value. Undefs are special
+ // since we can pretend they have any value.
+ if (UniqueRV.hasValue() && UniqueRV != &RV &&
+ !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) {
+ UniqueRV = nullptr;
+ return false;
+ }
+
+ // Do not overwrite a value with an undef.
+ if (!UniqueRV.hasValue() || !isa<UndefValue>(RV))
+ UniqueRV = &RV;
+
+ return true;
+ };
+
+ if (!A.checkForAllReturnedValues(Pred, *this))
+ UniqueRV = nullptr;
+
+ return UniqueRV;
+}
+
+bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
+ function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
+ const {
+ if (!isValidState())
+ return false;
+
+ // Check all returned values but ignore call sites as long as we have not
+ // encountered an overdefined one during an update.
+ for (auto &It : ReturnedValues) {
+ Value *RV = It.first;
+
+ CallBase *CB = dyn_cast<CallBase>(RV);
+ if (CB && !UnresolvedCalls.count(CB))
+ continue;
+
+ if (!Pred(*RV, It.second))
+ return false;
+ }
+
+ return true;
+}
+
+ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
+ size_t NumUnresolvedCalls = UnresolvedCalls.size();
+ bool Changed = false;
+
+ // State used in the value traversals starting in returned values.
+ struct RVState {
+ // The map in which we collect return values -> return instrs.
+ decltype(ReturnedValues) &RetValsMap;
+ // The flag to indicate a change.
+ bool &Changed;
+ // The return instrs we come from.
+ SmallSetVector<ReturnInst *, 4> RetInsts;
+ };
+
+ // Callback for a leaf value returned by the associated function.
+ auto VisitValueCB = [](Value &Val, const Instruction *, RVState &RVS,
+ bool) -> bool {
+ auto Size = RVS.RetValsMap[&Val].size();
+ RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end());
+ bool Inserted = RVS.RetValsMap[&Val].size() != Size;
+ RVS.Changed |= Inserted;
+ LLVM_DEBUG({
+ if (Inserted)
+ dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val
+ << " => " << RVS.RetInsts.size() << "\n";
+ });
+ return true;
+ };
+
+ // Helper method to invoke the generic value traversal.
+ auto VisitReturnedValue = [&](Value &RV, RVState &RVS,
+ const Instruction *CtxI) {
+ IRPosition RetValPos = IRPosition::value(RV);
+ return genericValueTraversal<AAReturnedValues, RVState>(
+ A, RetValPos, *this, RVS, VisitValueCB, CtxI,
+ /* UseValueSimplify */ false);
+ };
+
+ // Callback for all "return intructions" live in the associated function.
+ auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) {
+ ReturnInst &Ret = cast<ReturnInst>(I);
+ RVState RVS({ReturnedValues, Changed, {}});
+ RVS.RetInsts.insert(&Ret);
+ return VisitReturnedValue(*Ret.getReturnValue(), RVS, &I);
+ };
+
+ // Start by discovering returned values from all live returned instructions in
+ // the associated function.
+ if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}))
+ return indicatePessimisticFixpoint();
+
+ // Once returned values "directly" present in the code are handled we try to
+ // resolve returned calls. To avoid modifications to the ReturnedValues map
+ // while we iterate over it we kept record of potential new entries in a copy
+ // map, NewRVsMap.
+ decltype(ReturnedValues) NewRVsMap;
+
+ auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV
+ << " by #" << RIs.size() << " RIs\n");
+ CallBase *CB = dyn_cast<CallBase>(RV);
+ if (!CB || UnresolvedCalls.count(CB))
+ return;
+
+ if (!CB->getCalledFunction()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+ << "\n");
+ UnresolvedCalls.insert(CB);
+ return;
+ }
+
+ // TODO: use the function scope once we have call site AAReturnedValues.
+ const auto &RetValAA = A.getAAFor<AAReturnedValues>(
+ *this, IRPosition::function(*CB->getCalledFunction()));
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: "
+ << RetValAA << "\n");
+
+ // Skip dead ends, thus if we do not know anything about the returned
+ // call we mark it as unresolved and it will stay that way.
+ if (!RetValAA.getState().isValidState()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+ << "\n");
+ UnresolvedCalls.insert(CB);
+ return;
+ }
+
+ // Do not try to learn partial information. If the callee has unresolved
+ // return values we will treat the call as unresolved/opaque.
+ auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls();
+ if (!RetValAAUnresolvedCalls.empty()) {
+ UnresolvedCalls.insert(CB);
+ return;
+ }
+
+ // Now check if we can track transitively returned values. If possible, thus
+ // if all return value can be represented in the current scope, do so.
+ bool Unresolved = false;
+ for (auto &RetValAAIt : RetValAA.returned_values()) {
+ Value *RetVal = RetValAAIt.first;
+ if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) ||
+ isa<Constant>(RetVal))
+ continue;
+ // Anything that did not fit in the above categories cannot be resolved,
+ // mark the call as unresolved.
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value "
+ "cannot be translated: "
+ << *RetVal << "\n");
+ UnresolvedCalls.insert(CB);
+ Unresolved = true;
+ break;
+ }
+
+ if (Unresolved)
+ return;
+
+ // Now track transitively returned values.
+ unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB];
+ if (NumRetAA == RetValAA.getNumReturnValues()) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not "
+ "changed since it was seen last\n");
+ return;
+ }
+ NumRetAA = RetValAA.getNumReturnValues();
+
+ for (auto &RetValAAIt : RetValAA.returned_values()) {
+ Value *RetVal = RetValAAIt.first;
+ if (Argument *Arg = dyn_cast<Argument>(RetVal)) {
+ // Arguments are mapped to call site operands and we begin the traversal
+ // again.
+ bool Unused = false;
+ RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
+ VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB);
+ continue;
+ } else if (isa<CallBase>(RetVal)) {
+ // Call sites are resolved by the callee attribute over time, no need to
+ // do anything for us.
+ continue;
+ } else if (isa<Constant>(RetVal)) {
+ // Constants are valid everywhere, we can simply take them.
+ NewRVsMap[RetVal].insert(RIs.begin(), RIs.end());
+ continue;
+ }
+ }
+ };
+
+ for (auto &It : ReturnedValues)
+ HandleReturnValue(It.first, It.second);
+
+ // Because processing the new information can again lead to new return values
+ // we have to be careful and iterate until this iteration is complete. The
+ // idea is that we are in a stable state at the end of an update. All return
+ // values have been handled and properly categorized. We might not update
+ // again if we have not requested a non-fix attribute so we cannot "wait" for
+ // the next update to analyze a new return value.
+ while (!NewRVsMap.empty()) {
+ auto It = std::move(NewRVsMap.back());
+ NewRVsMap.pop_back();
+
+ assert(!It.second.empty() && "Entry does not add anything.");
+ auto &ReturnInsts = ReturnedValues[It.first];
+ for (ReturnInst *RI : It.second)
+ if (ReturnInsts.insert(RI)) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
+ << *It.first << " => " << *RI << "\n");
+ HandleReturnValue(It.first, ReturnInsts);
+ Changed = true;
+ }
+ }
+
+ Changed |= (NumUnresolvedCalls != UnresolvedCalls.size());
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+}
+
+struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
+ AAReturnedValuesFunction(const IRPosition &IRP, Attributor &A)
+ : AAReturnedValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) }
+};
+
+/// Returned values information for a call sites.
+struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
+ AAReturnedValuesCallSite(const IRPosition &IRP, Attributor &A)
+ : AAReturnedValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites instead of
+ // redirecting requests to the callee.
+ llvm_unreachable("Abstract attributes for returned values are not "
+ "supported for call sites yet!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// ------------------------ NoSync Function Attribute -------------------------
+
+struct AANoSyncImpl : AANoSync {
+ AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nosync" : "may-sync";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// Helper function used to determine whether an instruction is non-relaxed
+ /// atomic. In other words, if an atomic instruction does not have unordered
+ /// or monotonic ordering
+ static bool isNonRelaxedAtomic(Instruction *I);
+
+ /// Helper function used to determine whether an instruction is volatile.
+ static bool isVolatile(Instruction *I);
+
+ /// Helper function uset to check if intrinsic is volatile (memcpy, memmove,
+ /// memset).
+ static bool isNoSyncIntrinsic(Instruction *I);
+};
+
+bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
+ if (!I->isAtomic())
+ return false;
+
+ AtomicOrdering Ordering;
+ switch (I->getOpcode()) {
+ case Instruction::AtomicRMW:
+ Ordering = cast<AtomicRMWInst>(I)->getOrdering();
+ break;
+ case Instruction::Store:
+ Ordering = cast<StoreInst>(I)->getOrdering();
+ break;
+ case Instruction::Load:
+ Ordering = cast<LoadInst>(I)->getOrdering();
+ break;
+ case Instruction::Fence: {
+ auto *FI = cast<FenceInst>(I);
+ if (FI->getSyncScopeID() == SyncScope::SingleThread)
+ return false;
+ Ordering = FI->getOrdering();
+ break;
+ }
+ case Instruction::AtomicCmpXchg: {
+ AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering();
+ AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering();
+ // Only if both are relaxed, than it can be treated as relaxed.
+ // Otherwise it is non-relaxed.
+ if (Success != AtomicOrdering::Unordered &&
+ Success != AtomicOrdering::Monotonic)
+ return true;
+ if (Failure != AtomicOrdering::Unordered &&
+ Failure != AtomicOrdering::Monotonic)
+ return true;
+ return false;
+ }
+ default:
+ llvm_unreachable(
+ "New atomic operations need to be known in the attributor.");
+ }
+
+ // Relaxed.
+ if (Ordering == AtomicOrdering::Unordered ||
+ Ordering == AtomicOrdering::Monotonic)
+ return false;
+ return true;
+}
+
+/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
+/// FIXME: We should ipmrove the handling of intrinsics.
+bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ /// Element wise atomic memory intrinsics are can only be unordered,
+ /// therefore nosync.
+ case Intrinsic::memset_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ return true;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ if (!cast<MemIntrinsic>(II)->isVolatile())
+ return true;
+ return false;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+bool AANoSyncImpl::isVolatile(Instruction *I) {
+ assert(!isa<CallBase>(I) && "Calls should not be checked here");
+
+ switch (I->getOpcode()) {
+ case Instruction::AtomicRMW:
+ return cast<AtomicRMWInst>(I)->isVolatile();
+ case Instruction::Store:
+ return cast<StoreInst>(I)->isVolatile();
+ case Instruction::Load:
+ return cast<LoadInst>(I)->isVolatile();
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(I)->isVolatile();
+ default:
+ return false;
+ }
+}
+
+ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
+
+ auto CheckRWInstForNoSync = [&](Instruction &I) {
+ /// We are looking for volatile instructions or Non-Relaxed atomics.
+ /// FIXME: We should improve the handling of intrinsics.
+
+ if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
+ return true;
+
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->hasFnAttr(Attribute::NoSync))
+ return true;
+
+ const auto &NoSyncAA =
+ A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(*CB));
+ if (NoSyncAA.isAssumedNoSync())
+ return true;
+ return false;
+ }
+
+ if (!isVolatile(&I) && !isNonRelaxedAtomic(&I))
+ return true;
+
+ return false;
+ };
+
+ auto CheckForNoSync = [&](Instruction &I) {
+ // At this point we handled all read/write effects and they are all
+ // nosync, so they can be skipped.
+ if (I.mayReadOrWriteMemory())
+ return true;
+
+ // non-convergent and readnone imply nosync.
+ return !cast<CallBase>(I).isConvergent();
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
+ !A.checkForAllCallLikeInstructions(CheckForNoSync, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+}
+
+struct AANoSyncFunction final : public AANoSyncImpl {
+ AANoSyncFunction(const IRPosition &IRP, Attributor &A)
+ : AANoSyncImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) }
+};
+
+/// NoSync attribute deduction for a call sites.
+struct AANoSyncCallSite final : AANoSyncImpl {
+ AANoSyncCallSite(const IRPosition &IRP, Attributor &A)
+ : AANoSyncImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoSyncImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoSync::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
+};
+
+/// ------------------------ No-Free Attributes ----------------------------
+
+struct AANoFreeImpl : public AANoFree {
+ AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForNoFree = [&](Instruction &I) {
+ const auto &CB = cast<CallBase>(I);
+ if (CB.hasFnAttr(Attribute::NoFree))
+ return true;
+
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(CB));
+ return NoFreeAA.isAssumedNoFree();
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nofree" : "may-free";
+ }
+};
+
+struct AANoFreeFunction final : public AANoFreeImpl {
+ AANoFreeFunction(const IRPosition &IRP, Attributor &A)
+ : AANoFreeImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) }
+};
+
+/// NoFree attribute deduction for a call sites.
+struct AANoFreeCallSite final : AANoFreeImpl {
+ AANoFreeCallSite(const IRPosition &IRP, Attributor &A)
+ : AANoFreeImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoFreeImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoFree::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
+};
+
+/// NoFree attribute for floating values.
+struct AANoFreeFloating : AANoFreeImpl {
+ AANoFreeFloating(const IRPosition &IRP, Attributor &A)
+ : AANoFreeImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_FLOATING_ATTR(nofree)}
+
+ /// See Abstract Attribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const IRPosition &IRP = getIRPosition();
+
+ const auto &NoFreeAA =
+ A.getAAFor<AANoFree>(*this, IRPosition::function_scope(IRP));
+ if (NoFreeAA.isAssumedNoFree())
+ return ChangeStatus::UNCHANGED;
+
+ Value &AssociatedValue = getIRPosition().getAssociatedValue();
+ auto Pred = [&](const Use &U, bool &Follow) -> bool {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ if (CB->isBundleOperand(&U))
+ return false;
+ if (!CB->isArgOperand(&U))
+ return true;
+ unsigned ArgNo = CB->getArgOperandNo(&U);
+
+ const auto &NoFreeArg = A.getAAFor<AANoFree>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+ return NoFreeArg.isAssumedNoFree();
+ }
+
+ if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
+ isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+ Follow = true;
+ return true;
+ }
+ if (isa<ReturnInst>(UserI))
+ return true;
+
+ // Unknown user.
+ return false;
+ };
+ if (!A.checkForAllUses(Pred, *this, AssociatedValue))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+/// NoFree attribute for a call site argument.
+struct AANoFreeArgument final : AANoFreeFloating {
+ AANoFreeArgument(const IRPosition &IRP, Attributor &A)
+ : AANoFreeFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofree) }
+};
+
+/// NoFree attribute for call site arguments.
+struct AANoFreeCallSiteArgument final : AANoFreeFloating {
+ AANoFreeCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoFreeFloating(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoFree::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)};
+};
+
+/// NoFree attribute for function return value.
+struct AANoFreeReturned final : AANoFreeFloating {
+ AANoFreeReturned(const IRPosition &IRP, Attributor &A)
+ : AANoFreeFloating(IRP, A) {
+ llvm_unreachable("NoFree is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("NoFree is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("NoFree is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// NoFree attribute deduction for a call site return value.
+struct AANoFreeCallSiteReturned final : AANoFreeFloating {
+ AANoFreeCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AANoFreeFloating(IRP, A) {}
+
+ ChangeStatus manifest(Attributor &A) override {
+ return ChangeStatus::UNCHANGED;
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) }
+};
+
+/// ------------------------ NonNull Argument Attribute ------------------------
+static int64_t getKnownNonNullAndDerefBytesForUse(
+ Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue,
+ const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
+ TrackUse = false;
+
+ const Value *UseV = U->get();
+ if (!UseV->getType()->isPointerTy())
+ return 0;
+
+ Type *PtrTy = UseV->getType();
+ const Function *F = I->getFunction();
+ bool NullPointerIsDefined =
+ F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
+ const DataLayout &DL = A.getInfoCache().getDL();
+ if (const auto *CB = dyn_cast<CallBase>(I)) {
+ if (CB->isBundleOperand(U)) {
+ if (RetainedKnowledge RK = getKnowledgeFromUse(
+ U, {Attribute::NonNull, Attribute::Dereferenceable})) {
+ IsNonNull |=
+ (RK.AttrKind == Attribute::NonNull || !NullPointerIsDefined);
+ return RK.ArgValue;
+ }
+ return 0;
+ }
+
+ if (CB->isCallee(U)) {
+ IsNonNull |= !NullPointerIsDefined;
+ return 0;
+ }
+
+ unsigned ArgNo = CB->getArgOperandNo(U);
+ IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
+ // As long as we only use known information there is no need to track
+ // dependences here.
+ auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP,
+ /* TrackDependence */ false);
+ IsNonNull |= DerefAA.isKnownNonNull();
+ return DerefAA.getKnownDereferenceableBytes();
+ }
+
+ // We need to follow common pointer manipulation uses to the accesses they
+ // feed into. We can try to be smart to avoid looking through things we do not
+ // like for now, e.g., non-inbounds GEPs.
+ if (isa<CastInst>(I)) {
+ TrackUse = true;
+ return 0;
+ }
+
+ if (isa<GetElementPtrInst>(I)) {
+ TrackUse = true;
+ return 0;
+ }
+
+ int64_t Offset;
+ const Value *Base =
+ getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL);
+ if (Base) {
+ if (Base == &AssociatedValue &&
+ getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
+ int64_t DerefBytes =
+ (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset;
+
+ IsNonNull |= !NullPointerIsDefined;
+ return std::max(int64_t(0), DerefBytes);
+ }
+ }
+
+ /// Corner case when an offset is 0.
+ Base = getBasePointerOfAccessPointerOperand(I, Offset, DL,
+ /*AllowNonInbounds*/ true);
+ if (Base) {
+ if (Offset == 0 && Base == &AssociatedValue &&
+ getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
+ int64_t DerefBytes =
+ (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
+ IsNonNull |= !NullPointerIsDefined;
+ return std::max(int64_t(0), DerefBytes);
+ }
+ }
+
+ return 0;
+}
+
+struct AANonNullImpl : AANonNull {
+ AANonNullImpl(const IRPosition &IRP, Attributor &A)
+ : AANonNull(IRP, A),
+ NullIsDefined(NullPointerIsDefined(
+ getAnchorScope(),
+ getAssociatedValue().getType()->getPointerAddressSpace())) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ if (!NullIsDefined &&
+ hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
+ /* IgnoreSubsumingPositions */ false, &A))
+ indicateOptimisticFixpoint();
+ else if (isa<ConstantPointerNull>(V))
+ indicatePessimisticFixpoint();
+ else
+ AANonNull::initialize(A);
+
+ bool CanBeNull = true;
+ if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull))
+ if (!CanBeNull)
+ indicateOptimisticFixpoint();
+
+ if (!getState().isAtFixpoint())
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
+ }
+
+ /// See followUsesInMBEC
+ bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+ AANonNull::StateType &State) {
+ bool IsNonNull = false;
+ bool TrackUse = false;
+ getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
+ IsNonNull, TrackUse);
+ State.setKnown(IsNonNull);
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
+
+ /// Flag to determine if the underlying value can be null and still allow
+ /// valid accesses.
+ const bool NullIsDefined;
+};
+
+/// NonNull attribute for a floating value.
+struct AANonNullFloating : public AANonNullImpl {
+ AANonNullFloating(const IRPosition &IRP, Attributor &A)
+ : AANonNullImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ if (!NullIsDefined) {
+ const auto &DerefAA =
+ A.getAAFor<AADereferenceable>(*this, getIRPosition());
+ if (DerefAA.getAssumedDereferenceableBytes())
+ return ChangeStatus::UNCHANGED;
+ }
+
+ const DataLayout &DL = A.getDataLayout();
+
+ DominatorTree *DT = nullptr;
+ AssumptionCache *AC = nullptr;
+ InformationCache &InfoCache = A.getInfoCache();
+ if (const Function *Fn = getAnchorScope()) {
+ DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn);
+ AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn);
+ }
+
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
+ AANonNull::StateType &T, bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ if (!isKnownNonZero(&V, DL, 0, AC, CtxI, DT))
+ T.indicatePessimisticFixpoint();
+ } else {
+ // Use abstract attribute information.
+ const AANonNull::StateType &NS =
+ static_cast<const AANonNull::StateType &>(AA.getState());
+ T ^= NS;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AANonNull, StateType>(
+ A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
+};
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned final
+ : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl> {
+ AANonNullReturned(const IRPosition &IRP, Attributor &A)
+ : AAReturnedFromReturnedValues<AANonNull, AANonNullImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
+};
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument final
+ : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl> {
+ AANonNullArgument(const IRPosition &IRP, Attributor &A)
+ : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
+};
+
+struct AANonNullCallSiteArgument final : AANonNullFloating {
+ AANonNullCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANonNullFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) }
+};
+
+/// NonNull attribute for a call site return position.
+struct AANonNullCallSiteReturned final
+ : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl> {
+ AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
+};
+
+/// ------------------------ No-Recurse Attributes ----------------------------
+
+struct AANoRecurseImpl : public AANoRecurse {
+ AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {}
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "norecurse" : "may-recurse";
+ }
+};
+
+struct AANoRecurseFunction final : AANoRecurseImpl {
+ AANoRecurseFunction(const IRPosition &IRP, Attributor &A)
+ : AANoRecurseImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoRecurseImpl::initialize(A);
+ if (const Function *F = getAnchorScope())
+ if (A.getInfoCache().getSccSize(*F) != 1)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+
+ // If all live call sites are known to be no-recurse, we are as well.
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
+ /* TrackDependence */ false, DepClassTy::OPTIONAL);
+ return NoRecurseAA.isKnownNoRecurse();
+ };
+ bool AllCallSitesKnown;
+ if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) {
+ // If we know all call sites and all are known no-recurse, we are done.
+ // If all known call sites, which might not be all that exist, are known
+ // to be no-recurse, we are not done but we can continue to assume
+ // no-recurse. If one of the call sites we have not visited will become
+ // live, another update is triggered.
+ if (AllCallSitesKnown)
+ indicateOptimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // If the above check does not hold anymore we look at the calls.
+ auto CheckForNoRecurse = [&](Instruction &I) {
+ const auto &CB = cast<CallBase>(I);
+ if (CB.hasFnAttr(Attribute::NoRecurse))
+ return true;
+
+ const auto &NoRecurseAA =
+ A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(CB));
+ if (!NoRecurseAA.isAssumedNoRecurse())
+ return false;
+
+ // Recursion to the same function
+ if (CB.getCalledFunction() == getAnchorScope())
+ return false;
+
+ return true;
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) }
+};
+
+/// NoRecurse attribute deduction for a call sites.
+struct AANoRecurseCallSite final : AANoRecurseImpl {
+ AANoRecurseCallSite(const IRPosition &IRP, Attributor &A)
+ : AANoRecurseImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoRecurseImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoRecurse::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
+};
+
+/// -------------------- Undefined-Behavior Attributes ------------------------
+
+struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
+ AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A)
+ : AAUndefinedBehavior(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ // through a pointer (i.e. also branches etc.)
+ ChangeStatus updateImpl(Attributor &A) override {
+ const size_t UBPrevSize = KnownUBInsts.size();
+ const size_t NoUBPrevSize = AssumedNoUBInsts.size();
+
+ auto InspectMemAccessInstForUB = [&](Instruction &I) {
+ // Skip instructions that are already saved.
+ if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
+ return true;
+
+ // If we reach here, we know we have an instruction
+ // that accesses memory through a pointer operand,
+ // for which getPointerOperand() should give it to us.
+ const Value *PtrOp = getPointerOperand(&I, /* AllowVolatile */ true);
+ assert(PtrOp &&
+ "Expected pointer operand of memory accessing instruction");
+
+ // Either we stopped and the appropriate action was taken,
+ // or we got back a simplified value to continue.
+ Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I);
+ if (!SimplifiedPtrOp.hasValue())
+ return true;
+ const Value *PtrOpVal = SimplifiedPtrOp.getValue();
+
+ // A memory access through a pointer is considered UB
+ // only if the pointer has constant null value.
+ // TODO: Expand it to not only check constant values.
+ if (!isa<ConstantPointerNull>(PtrOpVal)) {
+ AssumedNoUBInsts.insert(&I);
+ return true;
+ }
+ const Type *PtrTy = PtrOpVal->getType();
+
+ // Because we only consider instructions inside functions,
+ // assume that a parent function exists.
+ const Function *F = I.getFunction();
+
+ // A memory access using constant null pointer is only considered UB
+ // if null pointer is _not_ defined for the target platform.
+ if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()))
+ AssumedNoUBInsts.insert(&I);
+ else
+ KnownUBInsts.insert(&I);
+ return true;
+ };
+
+ auto InspectBrInstForUB = [&](Instruction &I) {
+ // A conditional branch instruction is considered UB if it has `undef`
+ // condition.
+
+ // Skip instructions that are already saved.
+ if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
+ return true;
+
+ // We know we have a branch instruction.
+ auto BrInst = cast<BranchInst>(&I);
+
+ // Unconditional branches are never considered UB.
+ if (BrInst->isUnconditional())
+ return true;
+
+ // Either we stopped and the appropriate action was taken,
+ // or we got back a simplified value to continue.
+ Optional<Value *> SimplifiedCond =
+ stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
+ if (!SimplifiedCond.hasValue())
+ return true;
+ AssumedNoUBInsts.insert(&I);
+ return true;
+ };
+
+ A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
+ {Instruction::Load, Instruction::Store,
+ Instruction::AtomicCmpXchg,
+ Instruction::AtomicRMW},
+ /* CheckBBLivenessOnly */ true);
+ A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br},
+ /* CheckBBLivenessOnly */ true);
+ if (NoUBPrevSize != AssumedNoUBInsts.size() ||
+ UBPrevSize != KnownUBInsts.size())
+ return ChangeStatus::CHANGED;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ bool isKnownToCauseUB(Instruction *I) const override {
+ return KnownUBInsts.count(I);
+ }
+
+ bool isAssumedToCauseUB(Instruction *I) const override {
+ // In simple words, if an instruction is not in the assumed to _not_
+ // cause UB, then it is assumed UB (that includes those
+ // in the KnownUBInsts set). The rest is boilerplate
+ // is to ensure that it is one of the instructions we test
+ // for UB.
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
+ return !AssumedNoUBInsts.count(I);
+ case Instruction::Br: {
+ auto BrInst = cast<BranchInst>(I);
+ if (BrInst->isUnconditional())
+ return false;
+ return !AssumedNoUBInsts.count(I);
+ } break;
+ default:
+ return false;
+ }
+ return false;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (KnownUBInsts.empty())
+ return ChangeStatus::UNCHANGED;
+ for (Instruction *I : KnownUBInsts)
+ A.changeToUnreachableAfterManifest(I);
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "undefined-behavior" : "no-ub";
+ }
+
+ /// Note: The correctness of this analysis depends on the fact that the
+ /// following 2 sets will stop changing after some point.
+ /// "Change" here means that their size changes.
+ /// The size of each set is monotonically increasing
+ /// (we only add items to them) and it is upper bounded by the number of
+ /// instructions in the processed function (we can never save more
+ /// elements in either set than this number). Hence, at some point,
+ /// they will stop increasing.
+ /// Consequently, at some point, both sets will have stopped
+ /// changing, effectively making the analysis reach a fixpoint.
+
+ /// Note: These 2 sets are disjoint and an instruction can be considered
+ /// one of 3 things:
+ /// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in
+ /// the KnownUBInsts set.
+ /// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior
+ /// has a reason to assume it).
+ /// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior
+ /// could not find a reason to assume or prove that it can cause UB,
+ /// hence it assumes it doesn't. We have a set for these instructions
+ /// so that we don't reprocess them in every update.
+ /// Note however that instructions in this set may cause UB.
+
+protected:
+ /// A set of all live instructions _known_ to cause UB.
+ SmallPtrSet<Instruction *, 8> KnownUBInsts;
+
+private:
+ /// A set of all the (live) instructions that are assumed to _not_ cause UB.
+ SmallPtrSet<Instruction *, 8> AssumedNoUBInsts;
+
+ // Should be called on updates in which if we're processing an instruction
+ // \p I that depends on a value \p V, one of the following has to happen:
+ // - If the value is assumed, then stop.
+ // - If the value is known but undef, then consider it UB.
+ // - Otherwise, do specific processing with the simplified value.
+ // We return None in the first 2 cases to signify that an appropriate
+ // action was taken and the caller should stop.
+ // Otherwise, we return the simplified value that the caller should
+ // use for specific processing.
+ Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V,
+ Instruction *I) {
+ const auto &ValueSimplifyAA =
+ A.getAAFor<AAValueSimplify>(*this, IRPosition::value(*V));
+ Optional<Value *> SimplifiedV =
+ ValueSimplifyAA.getAssumedSimplifiedValue(A);
+ if (!ValueSimplifyAA.isKnown()) {
+ // Don't depend on assumed values.
+ return llvm::None;
+ }
+ if (!SimplifiedV.hasValue()) {
+ // If it is known (which we tested above) but it doesn't have a value,
+ // then we can assume `undef` and hence the instruction is UB.
+ KnownUBInsts.insert(I);
+ return llvm::None;
+ }
+ Value *Val = SimplifiedV.getValue();
+ if (isa<UndefValue>(Val)) {
+ KnownUBInsts.insert(I);
+ return llvm::None;
+ }
+ return Val;
+ }
+};
+
+struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
+ AAUndefinedBehaviorFunction(const IRPosition &IRP, Attributor &A)
+ : AAUndefinedBehaviorImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECL(UndefinedBehaviorInstruction, Instruction,
+ "Number of instructions known to have UB");
+ BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) +=
+ KnownUBInsts.size();
+ }
+};
+
+/// ------------------------ Will-Return Attributes ----------------------------
+
+// Helper function that checks whether a function has any cycle which we don't
+// know if it is bounded or not.
+// Loops with maximum trip count are considered bounded, any other cycle not.
+static bool mayContainUnboundedCycle(Function &F, Attributor &A) {
+ ScalarEvolution *SE =
+ A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(F);
+ LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(F);
+ // If either SCEV or LoopInfo is not available for the function then we assume
+ // any cycle to be unbounded cycle.
+ // We use scc_iterator which uses Tarjan algorithm to find all the maximal
+ // SCCs.To detect if there's a cycle, we only need to find the maximal ones.
+ if (!SE || !LI) {
+ for (scc_iterator<Function *> SCCI = scc_begin(&F); !SCCI.isAtEnd(); ++SCCI)
+ if (SCCI.hasCycle())
+ return true;
+ return false;
+ }
+
+ // If there's irreducible control, the function may contain non-loop cycles.
+ if (mayContainIrreducibleControl(F, LI))
+ return true;
+
+ // Any loop that does not have a max trip count is considered unbounded cycle.
+ for (auto *L : LI->getLoopsInPreorder()) {
+ if (!SE->getSmallConstantMaxTripCount(L))
+ return true;
+ }
+ return false;
+}
+
+struct AAWillReturnImpl : public AAWillReturn {
+ AAWillReturnImpl(const IRPosition &IRP, Attributor &A)
+ : AAWillReturn(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAWillReturn::initialize(A);
+
+ Function *F = getAnchorScope();
+ if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForWillReturn = [&](Instruction &I) {
+ IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I));
+ const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
+ if (WillReturnAA.isKnownWillReturn())
+ return true;
+ if (!WillReturnAA.isAssumedWillReturn())
+ return false;
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(*this, IPos);
+ return NoRecurseAA.isAssumedNoRecurse();
+ };
+
+ if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "willreturn" : "may-noreturn";
+ }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+ AAWillReturnFunction(const IRPosition &IRP, Attributor &A)
+ : AAWillReturnImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) }
+};
+
+/// WillReturn attribute deduction for a call sites.
+struct AAWillReturnCallSite final : AAWillReturnImpl {
+ AAWillReturnCallSite(const IRPosition &IRP, Attributor &A)
+ : AAWillReturnImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAWillReturnImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AAWillReturn::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
+};
+
+/// -------------------AAReachability Attribute--------------------------
+
+struct AAReachabilityImpl : AAReachability {
+ AAReachabilityImpl(const IRPosition &IRP, Attributor &A)
+ : AAReachability(IRP, A) {}
+
+ const std::string getAsStr() const override {
+ // TODO: Return the number of reachable queries.
+ return "reachable";
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override { indicatePessimisticFixpoint(); }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+};
+
+struct AAReachabilityFunction final : public AAReachabilityImpl {
+ AAReachabilityFunction(const IRPosition &IRP, Attributor &A)
+ : AAReachabilityImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); }
+};
+
+/// ------------------------ NoAlias Argument Attribute ------------------------
+
+struct AANoAliasImpl : AANoAlias {
+ AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) {
+ assert(getAssociatedType()->isPointerTy() &&
+ "Noalias is a pointer attribute");
+ }
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noalias" : "may-alias";
+ }
+};
+
+/// NoAlias attribute for a floating value.
+struct AANoAliasFloating final : AANoAliasImpl {
+ AANoAliasFloating(const IRPosition &IRP, Attributor &A)
+ : AANoAliasImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Value *Val = &getAssociatedValue();
+ do {
+ CastInst *CI = dyn_cast<CastInst>(Val);
+ if (!CI)
+ break;
+ Value *Base = CI->getOperand(0);
+ if (!Base->hasOneUse())
+ break;
+ Val = Base;
+ } while (true);
+
+ if (!Val->getType()->isPointerTy()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ if (isa<AllocaInst>(Val))
+ indicateOptimisticFixpoint();
+ else if (isa<ConstantPointerNull>(Val) &&
+ !NullPointerIsDefined(getAnchorScope(),
+ Val->getType()->getPointerAddressSpace()))
+ indicateOptimisticFixpoint();
+ else if (Val != &getAssociatedValue()) {
+ const auto &ValNoAliasAA =
+ A.getAAFor<AANoAlias>(*this, IRPosition::value(*Val));
+ if (ValNoAliasAA.isKnownNoAlias())
+ indicateOptimisticFixpoint();
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Implement this.
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(noalias)
+ }
+};
+
+/// NoAlias attribute for an argument.
+struct AANoAliasArgument final
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
+ using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
+ AANoAliasArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ // See callsite argument attribute and callee argument attribute.
+ if (hasAttr({Attribute::ByVal}))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::update(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // We have to make sure no-alias on the argument does not break
+ // synchronization when this is a callback argument, see also [1] below.
+ // If synchronization cannot be affected, we delegate to the base updateImpl
+ // function, otherwise we give up for now.
+
+ // If the function is no-sync, no-alias cannot break synchronization.
+ const auto &NoSyncAA = A.getAAFor<AANoSync>(
+ *this, IRPosition::function_scope(getIRPosition()));
+ if (NoSyncAA.isAssumedNoSync())
+ return Base::updateImpl(A);
+
+ // If the argument is read-only, no-alias cannot break synchronization.
+ const auto &MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
+ if (MemBehaviorAA.isAssumedReadOnly())
+ return Base::updateImpl(A);
+
+ // If the argument is never passed through callbacks, no-alias cannot break
+ // synchronization.
+ bool AllCallSitesKnown;
+ if (A.checkForAllCallSites(
+ [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
+ true, AllCallSitesKnown))
+ return Base::updateImpl(A);
+
+ // TODO: add no-alias but make sure it doesn't break synchronization by
+ // introducing fake uses. See:
+ // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel,
+ // International Workshop on OpenMP 2018,
+ // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf
+
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
+};
+
+struct AANoAliasCallSiteArgument final : AANoAliasImpl {
+ AANoAliasCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoAliasImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // See callsite argument attribute and callee argument attribute.
+ const auto &CB = cast<CallBase>(getAnchorValue());
+ if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias))
+ indicateOptimisticFixpoint();
+ Value &Val = getAssociatedValue();
+ if (isa<ConstantPointerNull>(Val) &&
+ !NullPointerIsDefined(getAnchorScope(),
+ Val.getType()->getPointerAddressSpace()))
+ indicateOptimisticFixpoint();
+ }
+
+ /// Determine if the underlying value may alias with the call site argument
+ /// \p OtherArgNo of \p ICS (= the underlying call site).
+ bool mayAliasWithArgument(Attributor &A, AAResults *&AAR,
+ const AAMemoryBehavior &MemBehaviorAA,
+ const CallBase &CB, unsigned OtherArgNo) {
+ // We do not need to worry about aliasing with the underlying IRP.
+ if (this->getArgNo() == (int)OtherArgNo)
+ return false;
+
+ // If it is not a pointer or pointer vector we do not alias.
+ const Value *ArgOp = CB.getArgOperand(OtherArgNo);
+ if (!ArgOp->getType()->isPtrOrPtrVectorTy())
+ return false;
+
+ auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ *this, IRPosition::callsite_argument(CB, OtherArgNo),
+ /* TrackDependence */ false);
+
+ // If the argument is readnone, there is no read-write aliasing.
+ if (CBArgMemBehaviorAA.isAssumedReadNone()) {
+ A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ return false;
+ }
+
+ // If the argument is readonly and the underlying value is readonly, there
+ // is no read-write aliasing.
+ bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly();
+ if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
+ A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ return false;
+ }
+
+ // We have to utilize actual alias analysis queries so we need the object.
+ if (!AAR)
+ AAR = A.getInfoCache().getAAResultsForFunction(*getAnchorScope());
+
+ // Try to rule it out at the call site.
+ bool IsAliasing = !AAR || !AAR->isNoAlias(&getAssociatedValue(), ArgOp);
+ LLVM_DEBUG(dbgs() << "[NoAliasCSArg] Check alias between "
+ "callsite arguments: "
+ << getAssociatedValue() << " " << *ArgOp << " => "
+ << (IsAliasing ? "" : "no-") << "alias \n");
+
+ return IsAliasing;
+ }
+
+ bool
+ isKnownNoAliasDueToNoAliasPreservation(Attributor &A, AAResults *&AAR,
+ const AAMemoryBehavior &MemBehaviorAA,
+ const AANoAlias &NoAliasAA) {
+ // We can deduce "noalias" if the following conditions hold.
+ // (i) Associated value is assumed to be noalias in the definition.
+ // (ii) Associated value is assumed to be no-capture in all the uses
+ // possibly executed before this callsite.
+ // (iii) There is no other pointer argument which could alias with the
+ // value.
+
+ bool AssociatedValueIsNoAliasAtDef = NoAliasAA.isAssumedNoAlias();
+ if (!AssociatedValueIsNoAliasAtDef) {
+ LLVM_DEBUG(dbgs() << "[AANoAlias] " << getAssociatedValue()
+ << " is not no-alias at the definition\n");
+ return false;
+ }
+
+ A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
+
+ const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
+ auto &NoCaptureAA =
+ A.getAAFor<AANoCapture>(*this, VIRP, /* TrackDependence */ false);
+ // Check whether the value is captured in the scope using AANoCapture.
+ // Look at CFG and check only uses possibly executed before this
+ // callsite.
+ auto UsePred = [&](const Use &U, bool &Follow) -> bool {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+
+ // If user if curr instr and only use.
+ if (UserI == getCtxI() && UserI->hasOneUse())
+ return true;
+
+ const Function *ScopeFn = VIRP.getAnchorScope();
+ if (ScopeFn) {
+ const auto &ReachabilityAA =
+ A.getAAFor<AAReachability>(*this, IRPosition::function(*ScopeFn));
+
+ if (!ReachabilityAA.isAssumedReachable(UserI, getCtxI()))
+ return true;
+
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ if (CB->isArgOperand(&U)) {
+
+ unsigned ArgNo = CB->getArgOperandNo(&U);
+
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+
+ if (NoCaptureAA.isAssumedNoCapture())
+ return true;
+ }
+ }
+ }
+
+ // For cases which can potentially have more users
+ if (isa<GetElementPtrInst>(U) || isa<BitCastInst>(U) || isa<PHINode>(U) ||
+ isa<SelectInst>(U)) {
+ Follow = true;
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n");
+ return false;
+ };
+
+ if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ if (!A.checkForAllUses(UsePred, *this, getAssociatedValue())) {
+ LLVM_DEBUG(
+ dbgs() << "[AANoAliasCSArg] " << getAssociatedValue()
+ << " cannot be noalias as it is potentially captured\n");
+ return false;
+ }
+ }
+ A.recordDependence(NoCaptureAA, *this, DepClassTy::OPTIONAL);
+
+ // Check there is no other pointer argument which could alias with the
+ // value passed at this call site.
+ // TODO: AbstractCallSite
+ const auto &CB = cast<CallBase>(getAnchorValue());
+ for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
+ OtherArgNo++)
+ if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
+ return false;
+
+ return true;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // If the argument is readnone we are done as there are no accesses via the
+ // argument.
+ auto &MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(),
+ /* TrackDependence */ false);
+ if (MemBehaviorAA.isAssumedReadNone()) {
+ A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
+ const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, VIRP,
+ /* TrackDependence */ false);
+
+ AAResults *AAR = nullptr;
+ if (isKnownNoAliasDueToNoAliasPreservation(A, AAR, MemBehaviorAA,
+ NoAliasAA)) {
+ LLVM_DEBUG(
+ dbgs() << "[AANoAlias] No-Alias deduced via no-alias preservation\n");
+ return ChangeStatus::UNCHANGED;
+ }
+
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) }
+};
+
+/// NoAlias attribute for function return value.
+struct AANoAliasReturned final : AANoAliasImpl {
+ AANoAliasReturned(const IRPosition &IRP, Attributor &A)
+ : AANoAliasImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+
+ auto CheckReturnValue = [&](Value &RV) -> bool {
+ if (Constant *C = dyn_cast<Constant>(&RV))
+ if (C->isNullValue() || isa<UndefValue>(C))
+ return true;
+
+ /// For now, we can only deduce noalias if we have call sites.
+ /// FIXME: add more support.
+ if (!isa<CallBase>(&RV))
+ return false;
+
+ const IRPosition &RVPos = IRPosition::value(RV);
+ const auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, RVPos);
+ if (!NoAliasAA.isAssumedNoAlias())
+ return false;
+
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, RVPos);
+ return NoCaptureAA.isAssumedNoCaptureMaybeReturned();
+ };
+
+ if (!A.checkForAllReturnedValues(CheckReturnValue, *this))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) }
+};
+
+/// NoAlias attribute deduction for a call site return value.
+struct AANoAliasCallSiteReturned final : AANoAliasImpl {
+ AANoAliasCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AANoAliasImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoAliasImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::returned(*F);
+ auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AANoAlias::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
+};
+
+/// -------------------AAIsDead Function Attribute-----------------------
+
+struct AAIsDeadValueImpl : public AAIsDead {
+ AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
+
+ /// See AAIsDead::isAssumedDead().
+ bool isAssumedDead() const override { return getAssumed(); }
+
+ /// See AAIsDead::isKnownDead().
+ bool isKnownDead() const override { return getKnown(); }
+
+ /// See AAIsDead::isAssumedDead(BasicBlock *).
+ bool isAssumedDead(const BasicBlock *BB) const override { return false; }
+
+ /// See AAIsDead::isKnownDead(BasicBlock *).
+ bool isKnownDead(const BasicBlock *BB) const override { return false; }
+
+ /// See AAIsDead::isAssumedDead(Instruction *I).
+ bool isAssumedDead(const Instruction *I) const override {
+ return I == getCtxI() && isAssumedDead();
+ }
+
+ /// See AAIsDead::isKnownDead(Instruction *I).
+ bool isKnownDead(const Instruction *I) const override {
+ return isAssumedDead(I) && getKnown();
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return isAssumedDead() ? "assumed-dead" : "assumed-live";
+ }
+
+ /// Check if all uses are assumed dead.
+ bool areAllUsesAssumedDead(Attributor &A, Value &V) {
+ auto UsePred = [&](const Use &U, bool &Follow) { return false; };
+ // Explicitly set the dependence class to required because we want a long
+ // chain of N dependent instructions to be considered live as soon as one is
+ // without going through N update cycles. This is not required for
+ // correctness.
+ return A.checkForAllUses(UsePred, *this, V, DepClassTy::REQUIRED);
+ }
+
+ /// Determine if \p I is assumed to be side-effect free.
+ bool isAssumedSideEffectFree(Attributor &A, Instruction *I) {
+ if (!I || wouldInstructionBeTriviallyDead(I))
+ return true;
+
+ auto *CB = dyn_cast<CallBase>(I);
+ if (!CB || isa<IntrinsicInst>(CB))
+ return false;
+
+ const IRPosition &CallIRP = IRPosition::callsite_function(*CB);
+ const auto &NoUnwindAA = A.getAndUpdateAAFor<AANoUnwind>(
+ *this, CallIRP, /* TrackDependence */ false);
+ if (!NoUnwindAA.isAssumedNoUnwind())
+ return false;
+ if (!NoUnwindAA.isKnownNoUnwind())
+ A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL);
+
+ const auto &MemBehaviorAA = A.getAndUpdateAAFor<AAMemoryBehavior>(
+ *this, CallIRP, /* TrackDependence */ false);
+ if (MemBehaviorAA.isAssumedReadOnly()) {
+ if (!MemBehaviorAA.isKnownReadOnly())
+ A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ return true;
+ }
+ return false;
+ }
+};
+
+struct AAIsDeadFloating : public AAIsDeadValueImpl {
+ AAIsDeadFloating(const IRPosition &IRP, Attributor &A)
+ : AAIsDeadValueImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (isa<UndefValue>(getAssociatedValue())) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
+ if (!isAssumedSideEffectFree(A, I))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
+ if (!isAssumedSideEffectFree(A, I))
+ return indicatePessimisticFixpoint();
+
+ if (!areAllUsesAssumedDead(A, getAssociatedValue()))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ if (auto *I = dyn_cast<Instruction>(&V)) {
+ // If we get here we basically know the users are all dead. We check if
+ // isAssumedSideEffectFree returns true here again because it might not be
+ // the case and only the users are dead but the instruction (=call) is
+ // still needed.
+ if (isAssumedSideEffectFree(A, I) && !isa<InvokeInst>(I)) {
+ A.deleteAfterManifest(*I);
+ return ChangeStatus::CHANGED;
+ }
+ }
+ if (V.use_empty())
+ return ChangeStatus::UNCHANGED;
+
+ bool UsedAssumedInformation = false;
+ Optional<Constant *> C =
+ A.getAssumedConstant(V, *this, UsedAssumedInformation);
+ if (C.hasValue() && C.getValue())
+ return ChangeStatus::UNCHANGED;
+
+ // Replace the value with undef as it is dead but keep droppable uses around
+ // as they provide information we don't want to give up on just yet.
+ UndefValue &UV = *UndefValue::get(V.getType());
+ bool AnyChange =
+ A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false);
+ return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(IsDead)
+ }
+};
+
+struct AAIsDeadArgument : public AAIsDeadFloating {
+ AAIsDeadArgument(const IRPosition &IRP, Attributor &A)
+ : AAIsDeadFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (!A.isFunctionIPOAmendable(*getAnchorScope()))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = AAIsDeadFloating::manifest(A);
+ Argument &Arg = *getAssociatedArgument();
+ if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {}))
+ if (A.registerFunctionSignatureRewrite(
+ Arg, /* ReplacementTypes */ {},
+ Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
+ Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) {
+ Arg.dropDroppableUses();
+ return ChangeStatus::CHANGED;
+ }
+ return Changed;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(IsDead) }
+};
+
+struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
+ AAIsDeadCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAIsDeadValueImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (isa<UndefValue>(getAssociatedValue()))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(), static_cast<const AAIsDead::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ CallBase &CB = cast<CallBase>(getAnchorValue());
+ Use &U = CB.getArgOperandUse(getArgNo());
+ assert(!isa<UndefValue>(U.get()) &&
+ "Expected undef values to be filtered out!");
+ UndefValue &UV = *UndefValue::get(U->getType());
+ if (A.changeUseAfterManifest(U, UV))
+ return ChangeStatus::CHANGED;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(IsDead) }
+};
+
+struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
+ AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {}
+
+ /// See AAIsDead::isAssumedDead().
+ bool isAssumedDead() const override {
+ return AAIsDeadFloating::isAssumedDead() && IsAssumedSideEffectFree;
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (isa<UndefValue>(getAssociatedValue())) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ // We track this separately as a secondary state.
+ IsAssumedSideEffectFree = isAssumedSideEffectFree(A, getCtxI());
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ if (IsAssumedSideEffectFree && !isAssumedSideEffectFree(A, getCtxI())) {
+ IsAssumedSideEffectFree = false;
+ Changed = ChangeStatus::CHANGED;
+ }
+
+ if (!areAllUsesAssumedDead(A, getAssociatedValue()))
+ return indicatePessimisticFixpoint();
+ return Changed;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (IsAssumedSideEffectFree)
+ STATS_DECLTRACK_CSRET_ATTR(IsDead)
+ else
+ STATS_DECLTRACK_CSRET_ATTR(UnusedResult)
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return isAssumedDead()
+ ? "assumed-dead"
+ : (getAssumed() ? "assumed-dead-users" : "assumed-live");
+ }
+
+private:
+ bool IsAssumedSideEffectFree;
+};
+
+struct AAIsDeadReturned : public AAIsDeadValueImpl {
+ AAIsDeadReturned(const IRPosition &IRP, Attributor &A)
+ : AAIsDeadValueImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+
+ A.checkForAllInstructions([](Instruction &) { return true; }, *this,
+ {Instruction::Ret});
+
+ auto PredForCallSite = [&](AbstractCallSite ACS) {
+ if (ACS.isCallbackCall() || !ACS.getInstruction())
+ return false;
+ return areAllUsesAssumedDead(A, *ACS.getInstruction());
+ };
+
+ bool AllCallSitesKnown;
+ if (!A.checkForAllCallSites(PredForCallSite, *this, true,
+ AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // TODO: Rewrite the signature to return void?
+ bool AnyChange = false;
+ UndefValue &UV = *UndefValue::get(getAssociatedFunction()->getReturnType());
+ auto RetInstPred = [&](Instruction &I) {
+ ReturnInst &RI = cast<ReturnInst>(I);
+ if (!isa<UndefValue>(RI.getReturnValue()))
+ AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV);
+ return true;
+ };
+ A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret});
+ return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(IsDead) }
+};
+
+struct AAIsDeadFunction : public AAIsDead {
+ AAIsDeadFunction(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ const Function *F = getAnchorScope();
+ if (F && !F->isDeclaration()) {
+ ToBeExploredFrom.insert(&F->getEntryBlock().front());
+ assumeLive(A, F->getEntryBlock());
+ }
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
+ std::to_string(getAnchorScope()->size()) + "][#TBEP " +
+ std::to_string(ToBeExploredFrom.size()) + "][#KDE " +
+ std::to_string(KnownDeadEnds.size()) + "]";
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ Function &F = *getAnchorScope();
+
+ if (AssumedLiveBlocks.empty()) {
+ A.deleteAfterManifest(F);
+ return ChangeStatus::CHANGED;
+ }
+
+ // Flag to determine if we can change an invoke to a call assuming the
+ // callee is nounwind. This is not possible if the personality of the
+ // function allows to catch asynchronous exceptions.
+ bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
+
+ KnownDeadEnds.set_union(ToBeExploredFrom);
+ for (const Instruction *DeadEndI : KnownDeadEnds) {
+ auto *CB = dyn_cast<CallBase>(DeadEndI);
+ if (!CB)
+ continue;
+ const auto &NoReturnAA = A.getAndUpdateAAFor<AANoReturn>(
+ *this, IRPosition::callsite_function(*CB), /* TrackDependence */ true,
+ DepClassTy::OPTIONAL);
+ bool MayReturn = !NoReturnAA.isAssumedNoReturn();
+ if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB)))
+ continue;
+
+ if (auto *II = dyn_cast<InvokeInst>(DeadEndI))
+ A.registerInvokeWithDeadSuccessor(const_cast<InvokeInst &>(*II));
+ else
+ A.changeToUnreachableAfterManifest(
+ const_cast<Instruction *>(DeadEndI->getNextNode()));
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ STATS_DECL(AAIsDead, BasicBlock, "Number of dead basic blocks deleted.");
+ for (BasicBlock &BB : F)
+ if (!AssumedLiveBlocks.count(&BB)) {
+ A.deleteAfterManifest(BB);
+ ++BUILD_STAT_NAME(AAIsDead, BasicBlock);
+ }
+
+ return HasChanged;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// Returns true if the function is assumed dead.
+ bool isAssumedDead() const override { return false; }
+
+ /// See AAIsDead::isKnownDead().
+ bool isKnownDead() const override { return false; }
+
+ /// See AAIsDead::isAssumedDead(BasicBlock *).
+ bool isAssumedDead(const BasicBlock *BB) const override {
+ assert(BB->getParent() == getAnchorScope() &&
+ "BB must be in the same anchor scope function.");
+
+ if (!getAssumed())
+ return false;
+ return !AssumedLiveBlocks.count(BB);
+ }
+
+ /// See AAIsDead::isKnownDead(BasicBlock *).
+ bool isKnownDead(const BasicBlock *BB) const override {
+ return getKnown() && isAssumedDead(BB);
+ }
+
+ /// See AAIsDead::isAssumed(Instruction *I).
+ bool isAssumedDead(const Instruction *I) const override {
+ assert(I->getParent()->getParent() == getAnchorScope() &&
+ "Instruction must be in the same anchor scope function.");
+
+ if (!getAssumed())
+ return false;
+
+ // If it is not in AssumedLiveBlocks then it for sure dead.
+ // Otherwise, it can still be after noreturn call in a live block.
+ if (!AssumedLiveBlocks.count(I->getParent()))
+ return true;
+
+ // If it is not after a liveness barrier it is live.
+ const Instruction *PrevI = I->getPrevNode();
+ while (PrevI) {
+ if (KnownDeadEnds.count(PrevI) || ToBeExploredFrom.count(PrevI))
+ return true;
+ PrevI = PrevI->getPrevNode();
+ }
+ return false;
+ }
+
+ /// See AAIsDead::isKnownDead(Instruction *I).
+ bool isKnownDead(const Instruction *I) const override {
+ return getKnown() && isAssumedDead(I);
+ }
+
+ /// Assume \p BB is (partially) live now and indicate to the Attributor \p A
+ /// that internal function called from \p BB should now be looked at.
+ bool assumeLive(Attributor &A, const BasicBlock &BB) {
+ if (!AssumedLiveBlocks.insert(&BB).second)
+ return false;
+
+ // We assume that all of BB is (probably) live now and if there are calls to
+ // internal functions we will assume that those are now live as well. This
+ // is a performance optimization for blocks with calls to a lot of internal
+ // functions. It can however cause dead functions to be treated as live.
+ for (const Instruction &I : BB)
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (const Function *F = CB->getCalledFunction())
+ if (F->hasLocalLinkage())
+ A.markLiveInternalFunction(*F);
+ return true;
+ }
+
+ /// Collection of instructions that need to be explored again, e.g., we
+ /// did assume they do not transfer control to (one of their) successors.
+ SmallSetVector<const Instruction *, 8> ToBeExploredFrom;
+
+ /// Collection of instructions that are known to not transfer control.
+ SmallSetVector<const Instruction *, 8> KnownDeadEnds;
+
+ /// Collection of all assumed live BasicBlocks.
+ DenseSet<const BasicBlock *> AssumedLiveBlocks;
+};
+
+static bool
+identifyAliveSuccessors(Attributor &A, const CallBase &CB,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ const IRPosition &IPos = IRPosition::callsite_function(CB);
+
+ const auto &NoReturnAA = A.getAndUpdateAAFor<AANoReturn>(
+ AA, IPos, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ if (NoReturnAA.isAssumedNoReturn())
+ return !NoReturnAA.isKnownNoReturn();
+ if (CB.isTerminator())
+ AliveSuccessors.push_back(&CB.getSuccessor(0)->front());
+ else
+ AliveSuccessors.push_back(CB.getNextNode());
+ return false;
+}
+
+static bool
+identifyAliveSuccessors(Attributor &A, const InvokeInst &II,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ bool UsedAssumedInformation =
+ identifyAliveSuccessors(A, cast<CallBase>(II), AA, AliveSuccessors);
+
+ // First, determine if we can change an invoke to a call assuming the
+ // callee is nounwind. This is not possible if the personality of the
+ // function allows to catch asynchronous exceptions.
+ if (AAIsDeadFunction::mayCatchAsynchronousExceptions(*II.getFunction())) {
+ AliveSuccessors.push_back(&II.getUnwindDest()->front());
+ } else {
+ const IRPosition &IPos = IRPosition::callsite_function(II);
+ const auto &AANoUnw = A.getAndUpdateAAFor<AANoUnwind>(
+ AA, IPos, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ if (AANoUnw.isAssumedNoUnwind()) {
+ UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind();
+ } else {
+ AliveSuccessors.push_back(&II.getUnwindDest()->front());
+ }
+ }
+ return UsedAssumedInformation;
+}
+
+static bool
+identifyAliveSuccessors(Attributor &A, const BranchInst &BI,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ bool UsedAssumedInformation = false;
+ if (BI.getNumSuccessors() == 1) {
+ AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
+ } else {
+ Optional<ConstantInt *> CI = getAssumedConstantInt(
+ A, *BI.getCondition(), AA, UsedAssumedInformation);
+ if (!CI.hasValue()) {
+ // No value yet, assume both edges are dead.
+ } else if (CI.getValue()) {
+ const BasicBlock *SuccBB =
+ BI.getSuccessor(1 - CI.getValue()->getZExtValue());
+ AliveSuccessors.push_back(&SuccBB->front());
+ } else {
+ AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
+ AliveSuccessors.push_back(&BI.getSuccessor(1)->front());
+ UsedAssumedInformation = false;
+ }
+ }
+ return UsedAssumedInformation;
+}
+
+static bool
+identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
+ AbstractAttribute &AA,
+ SmallVectorImpl<const Instruction *> &AliveSuccessors) {
+ bool UsedAssumedInformation = false;
+ Optional<ConstantInt *> CI =
+ getAssumedConstantInt(A, *SI.getCondition(), AA, UsedAssumedInformation);
+ if (!CI.hasValue()) {
+ // No value yet, assume all edges are dead.
+ } else if (CI.getValue()) {
+ for (auto &CaseIt : SI.cases()) {
+ if (CaseIt.getCaseValue() == CI.getValue()) {
+ AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
+ return UsedAssumedInformation;
+ }
+ }
+ AliveSuccessors.push_back(&SI.getDefaultDest()->front());
+ return UsedAssumedInformation;
+ } else {
+ for (const BasicBlock *SuccBB : successors(SI.getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
+ }
+ return UsedAssumedInformation;
+}
+
+ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ LLVM_DEBUG(dbgs() << "[AAIsDead] Live [" << AssumedLiveBlocks.size() << "/"
+ << getAnchorScope()->size() << "] BBs and "
+ << ToBeExploredFrom.size() << " exploration points and "
+ << KnownDeadEnds.size() << " known dead ends\n");
+
+ // Copy and clear the list of instructions we need to explore from. It is
+ // refilled with instructions the next update has to look at.
+ SmallVector<const Instruction *, 8> Worklist(ToBeExploredFrom.begin(),
+ ToBeExploredFrom.end());
+ decltype(ToBeExploredFrom) NewToBeExploredFrom;
+
+ SmallVector<const Instruction *, 8> AliveSuccessors;
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+ LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n");
+
+ AliveSuccessors.clear();
+
+ bool UsedAssumedInformation = false;
+ switch (I->getOpcode()) {
+ // TODO: look for (assumed) UB to backwards propagate "deadness".
+ default:
+ if (I->isTerminator()) {
+ for (const BasicBlock *SuccBB : successors(I->getParent()))
+ AliveSuccessors.push_back(&SuccBB->front());
+ } else {
+ AliveSuccessors.push_back(I->getNextNode());
+ }
+ break;
+ case Instruction::Call:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ case Instruction::Invoke:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<InvokeInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ case Instruction::Br:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<BranchInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ case Instruction::Switch:
+ UsedAssumedInformation = identifyAliveSuccessors(A, cast<SwitchInst>(*I),
+ *this, AliveSuccessors);
+ break;
+ }
+
+ if (UsedAssumedInformation) {
+ NewToBeExploredFrom.insert(I);
+ } else {
+ Change = ChangeStatus::CHANGED;
+ if (AliveSuccessors.empty() ||
+ (I->isTerminator() && AliveSuccessors.size() < I->getNumSuccessors()))
+ KnownDeadEnds.insert(I);
+ }
+
+ LLVM_DEBUG(dbgs() << "[AAIsDead] #AliveSuccessors: "
+ << AliveSuccessors.size() << " UsedAssumedInformation: "
+ << UsedAssumedInformation << "\n");
+
+ for (const Instruction *AliveSuccessor : AliveSuccessors) {
+ if (!I->isTerminator()) {
+ assert(AliveSuccessors.size() == 1 &&
+ "Non-terminator expected to have a single successor!");
+ Worklist.push_back(AliveSuccessor);
+ } else {
+ if (assumeLive(A, *AliveSuccessor->getParent()))
+ Worklist.push_back(AliveSuccessor);
+ }
+ }
+ }
+
+ ToBeExploredFrom = std::move(NewToBeExploredFrom);
+
+ // If we know everything is live there is no need to query for liveness.
+ // Instead, indicating a pessimistic fixpoint will cause the state to be
+ // "invalid" and all queries to be answered conservatively without lookups.
+ // To be in this state we have to (1) finished the exploration and (3) not
+ // discovered any non-trivial dead end and (2) not ruled unreachable code
+ // dead.
+ if (ToBeExploredFrom.empty() &&
+ getAnchorScope()->size() == AssumedLiveBlocks.size() &&
+ llvm::all_of(KnownDeadEnds, [](const Instruction *DeadEndI) {
+ return DeadEndI->isTerminator() && DeadEndI->getNumSuccessors() == 0;
+ }))
+ return indicatePessimisticFixpoint();
+ return Change;
+}
+
+/// Liveness information for a call sites.
+struct AAIsDeadCallSite final : AAIsDeadFunction {
+ AAIsDeadCallSite(const IRPosition &IRP, Attributor &A)
+ : AAIsDeadFunction(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites instead of
+ // redirecting requests to the callee.
+ llvm_unreachable("Abstract attributes for liveness are not "
+ "supported for call sites yet!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// -------------------- Dereferenceable Argument Attribute --------------------
+
+template <>
+ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
+ const DerefState &R) {
+ ChangeStatus CS0 =
+ clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState);
+ ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState);
+ return CS0 | CS1;
+}
+
+struct AADereferenceableImpl : AADereferenceable {
+ AADereferenceableImpl(const IRPosition &IRP, Attributor &A)
+ : AADereferenceable(IRP, A) {}
+ using StateType = DerefState;
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ SmallVector<Attribute, 4> Attrs;
+ getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
+ Attrs, /* IgnoreSubsumingPositions */ false, &A);
+ for (const Attribute &Attr : Attrs)
+ takeKnownDerefBytesMaximum(Attr.getValueAsInt());
+
+ const IRPosition &IRP = this->getIRPosition();
+ NonNullAA = &A.getAAFor<AANonNull>(*this, IRP,
+ /* TrackDependence */ false);
+
+ bool CanBeNull;
+ takeKnownDerefBytesMaximum(
+ IRP.getAssociatedValue().getPointerDereferenceableBytes(
+ A.getDataLayout(), CanBeNull));
+
+ bool IsFnInterface = IRP.isFnInterfaceKind();
+ Function *FnScope = IRP.getAnchorScope();
+ if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
+ }
+
+ /// See AbstractAttribute::getState()
+ /// {
+ StateType &getState() override { return *this; }
+ const StateType &getState() const override { return *this; }
+ /// }
+
+ /// Helper function for collecting accessed bytes in must-be-executed-context
+ void addAccessedBytesForUse(Attributor &A, const Use *U, const Instruction *I,
+ DerefState &State) {
+ const Value *UseV = U->get();
+ if (!UseV->getType()->isPointerTy())
+ return;
+
+ Type *PtrTy = UseV->getType();
+ const DataLayout &DL = A.getDataLayout();
+ int64_t Offset;
+ if (const Value *Base = getBasePointerOfAccessPointerOperand(
+ I, Offset, DL, /*AllowNonInbounds*/ true)) {
+ if (Base == &getAssociatedValue() &&
+ getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
+ uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType());
+ State.addAccessedBytes(Offset, Size);
+ }
+ }
+ return;
+ }
+
+ /// See followUsesInMBEC
+ bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+ AADereferenceable::StateType &State) {
+ bool IsNonNull = false;
+ bool TrackUse = false;
+ int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
+ A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
+ LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes: " << DerefBytes
+ << " for instruction " << *I << "\n");
+
+ addAccessedBytesForUse(A, U, I, State);
+ State.takeKnownDerefBytesMaximum(DerefBytes);
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Change = AADereferenceable::manifest(A);
+ if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) {
+ removeAttrs({Attribute::DereferenceableOrNull});
+ return ChangeStatus::CHANGED;
+ }
+ return Change;
+ }
+
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ // TODO: Add *_globally support
+ if (isAssumedNonNull())
+ Attrs.emplace_back(Attribute::getWithDereferenceableBytes(
+ Ctx, getAssumedDereferenceableBytes()));
+ else
+ Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes(
+ Ctx, getAssumedDereferenceableBytes()));
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ if (!getAssumedDereferenceableBytes())
+ return "unknown-dereferenceable";
+ return std::string("dereferenceable") +
+ (isAssumedNonNull() ? "" : "_or_null") +
+ (isAssumedGlobal() ? "_globally" : "") + "<" +
+ std::to_string(getKnownDereferenceableBytes()) + "-" +
+ std::to_string(getAssumedDereferenceableBytes()) + ">";
+ }
+};
+
+/// Dereferenceable attribute for a floating value.
+struct AADereferenceableFloating : AADereferenceableImpl {
+ AADereferenceableFloating(const IRPosition &IRP, Attributor &A)
+ : AADereferenceableImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T,
+ bool Stripped) -> bool {
+ unsigned IdxWidth =
+ DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
+ APInt Offset(IdxWidth, 0);
+ const Value *Base =
+ stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false);
+
+ const auto &AA =
+ A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base));
+ int64_t DerefBytes = 0;
+ if (!Stripped && this == &AA) {
+ // Use IR information if we did not strip anything.
+ // TODO: track globally.
+ bool CanBeNull;
+ DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull);
+ T.GlobalState.indicatePessimisticFixpoint();
+ } else {
+ const DerefState &DS = static_cast<const DerefState &>(AA.getState());
+ DerefBytes = DS.DerefBytesState.getAssumed();
+ T.GlobalState &= DS.GlobalState;
+ }
+
+
+ // For now we do not try to "increase" dereferenceability due to negative
+ // indices as we first have to come up with code to deal with loops and
+ // for overflows of the dereferenceable bytes.
+ int64_t OffsetSExt = Offset.getSExtValue();
+ if (OffsetSExt < 0)
+ OffsetSExt = 0;
+
+ T.takeAssumedDerefBytesMinimum(
+ std::max(int64_t(0), DerefBytes - OffsetSExt));
+
+ if (this == &AA) {
+ if (!Stripped) {
+ // If nothing was stripped IR information is all we got.
+ T.takeKnownDerefBytesMaximum(
+ std::max(int64_t(0), DerefBytes - OffsetSExt));
+ T.indicatePessimisticFixpoint();
+ } else if (OffsetSExt > 0) {
+ // If something was stripped but there is circular reasoning we look
+ // for the offset. If it is positive we basically decrease the
+ // dereferenceable bytes in a circluar loop now, which will simply
+ // drive them down to the known value in a very slow way which we
+ // can accelerate.
+ T.indicatePessimisticFixpoint();
+ }
+ }
+
+ return T.isValidState();
+ };
+
+ DerefState T;
+ if (!genericValueTraversal<AADereferenceable, DerefState>(
+ A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(dereferenceable)
+ }
+};
+
+/// Dereferenceable attribute for a return value.
+struct AADereferenceableReturned final
+ : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl> {
+ AADereferenceableReturned(const IRPosition &IRP, Attributor &A)
+ : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl>(
+ IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(dereferenceable)
+ }
+};
+
+/// Dereferenceable attribute for an argument
+struct AADereferenceableArgument final
+ : AAArgumentFromCallSiteArguments<AADereferenceable,
+ AADereferenceableImpl> {
+ using Base =
+ AAArgumentFromCallSiteArguments<AADereferenceable, AADereferenceableImpl>;
+ AADereferenceableArgument(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(dereferenceable)
+ }
+};
+
+/// Dereferenceable attribute for a call site argument.
+struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
+ AADereferenceableCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AADereferenceableFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(dereferenceable)
+ }
+};
+
+/// Dereferenceable attribute deduction for a call site return value.
+struct AADereferenceableCallSiteReturned final
+ : AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl> {
+ using Base =
+ AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl>;
+ AADereferenceableCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(dereferenceable);
+ }
+};
+
+// ------------------------ Align Argument Attribute ------------------------
+
+static unsigned getKnownAlignForUse(Attributor &A,
+ AbstractAttribute &QueryingAA,
+ Value &AssociatedValue, const Use *U,
+ const Instruction *I, bool &TrackUse) {
+ // We need to follow common pointer manipulation uses to the accesses they
+ // feed into.
+ if (isa<CastInst>(I)) {
+ // Follow all but ptr2int casts.
+ TrackUse = !isa<PtrToIntInst>(I);
+ return 0;
+ }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEP->hasAllConstantIndices()) {
+ TrackUse = true;
+ return 0;
+ }
+ }
+
+ MaybeAlign MA;
+ if (const auto *CB = dyn_cast<CallBase>(I)) {
+ if (CB->isBundleOperand(U) || CB->isCallee(U))
+ return 0;
+
+ unsigned ArgNo = CB->getArgOperandNo(U);
+ IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
+ // As long as we only use known information there is no need to track
+ // dependences here.
+ auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
+ /* TrackDependence */ false);
+ MA = MaybeAlign(AlignAA.getKnownAlign());
+ }
+
+ const DataLayout &DL = A.getDataLayout();
+ const Value *UseV = U->get();
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (SI->getPointerOperand() == UseV)
+ MA = SI->getAlign();
+ } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (LI->getPointerOperand() == UseV)
+ MA = LI->getAlign();
+ }
+
+ if (!MA || *MA <= 1)
+ return 0;
+
+ unsigned Alignment = MA->value();
+ int64_t Offset;
+
+ if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) {
+ if (Base == &AssociatedValue) {
+ // BasePointerAddr + Offset = Alignment * Q for some integer Q.
+ // So we can say that the maximum power of two which is a divisor of
+ // gcd(Offset, Alignment) is an alignment.
+
+ uint32_t gcd =
+ greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment);
+ Alignment = llvm::PowerOf2Floor(gcd);
+ }
+ }
+
+ return Alignment;
+}
+
+struct AAAlignImpl : AAAlign {
+ AAAlignImpl(const IRPosition &IRP, Attributor &A) : AAAlign(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ SmallVector<Attribute, 4> Attrs;
+ getAttrs({Attribute::Alignment}, Attrs);
+ for (const Attribute &Attr : Attrs)
+ takeKnownMaximum(Attr.getValueAsInt());
+
+ Value &V = getAssociatedValue();
+ // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int
+ // use of the function pointer. This was caused by D73131. We want to
+ // avoid this for function pointers especially because we iterate
+ // their uses and int2ptr is not handled. It is not a correctness
+ // problem though!
+ if (!V.getType()->getPointerElementType()->isFunctionTy())
+ takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
+
+ if (getIRPosition().isFnInterfaceKind() &&
+ (!getAnchorScope() ||
+ !A.isFunctionIPOAmendable(*getAssociatedFunction()))) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus LoadStoreChanged = ChangeStatus::UNCHANGED;
+
+ // Check for users that allow alignment annotations.
+ Value &AssociatedValue = getAssociatedValue();
+ for (const Use &U : AssociatedValue.uses()) {
+ if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
+ if (SI->getPointerOperand() == &AssociatedValue)
+ if (SI->getAlignment() < getAssumedAlign()) {
+ STATS_DECLTRACK(AAAlign, Store,
+ "Number of times alignment added to a store");
+ SI->setAlignment(Align(getAssumedAlign()));
+ LoadStoreChanged = ChangeStatus::CHANGED;
+ }
+ } else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
+ if (LI->getPointerOperand() == &AssociatedValue)
+ if (LI->getAlignment() < getAssumedAlign()) {
+ LI->setAlignment(Align(getAssumedAlign()));
+ STATS_DECLTRACK(AAAlign, Load,
+ "Number of times alignment added to a load");
+ LoadStoreChanged = ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ ChangeStatus Changed = AAAlign::manifest(A);
+
+ Align InheritAlign =
+ getAssociatedValue().getPointerAlignment(A.getDataLayout());
+ if (InheritAlign >= getAssumedAlign())
+ return LoadStoreChanged;
+ return Changed | LoadStoreChanged;
+ }
+
+ // TODO: Provide a helper to determine the implied ABI alignment and check in
+ // the existing manifest method and a new one for AAAlignImpl that value
+ // to avoid making the alignment explicit if it did not improve.
+
+ /// See AbstractAttribute::getDeducedAttributes
+ virtual void
+ getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ if (getAssumedAlign() > 1)
+ Attrs.emplace_back(
+ Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
+ }
+
+ /// See followUsesInMBEC
+ bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+ AAAlign::StateType &State) {
+ bool TrackUse = false;
+
+ unsigned int KnownAlign =
+ getKnownAlignForUse(A, *this, getAssociatedValue(), U, I, TrackUse);
+ State.takeKnownMaximum(KnownAlign);
+
+ return TrackUse;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
+ "-" + std::to_string(getAssumedAlign()) + ">")
+ : "unknown-align";
+ }
+};
+
+/// Align attribute for a floating value.
+struct AAAlignFloating : AAAlignImpl {
+ AAAlignFloating(const IRPosition &IRP, Attributor &A) : AAAlignImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const DataLayout &DL = A.getDataLayout();
+
+ auto VisitValueCB = [&](Value &V, const Instruction *,
+ AAAlign::StateType &T, bool Stripped) -> bool {
+ const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ // Use only IR information if we did not strip anything.
+ Align PA = V.getPointerAlignment(DL);
+ T.takeKnownMaximum(PA.value());
+ T.indicatePessimisticFixpoint();
+ } else {
+ // Use abstract attribute information.
+ const AAAlign::StateType &DS =
+ static_cast<const AAAlign::StateType &>(AA.getState());
+ T ^= DS;
+ }
+ return T.isValidState();
+ };
+
+ StateType T;
+ if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T,
+ VisitValueCB, getCtxI()))
+ return indicatePessimisticFixpoint();
+
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) }
+};
+
+/// Align attribute for function return value.
+struct AAAlignReturned final
+ : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
+ AAAlignReturned(const IRPosition &IRP, Attributor &A)
+ : AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
+};
+
+/// Align attribute for function argument.
+struct AAAlignArgument final
+ : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> {
+ using Base = AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>;
+ AAAlignArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // If the associated argument is involved in a must-tail call we give up
+ // because we would need to keep the argument alignments of caller and
+ // callee in-sync. Just does not seem worth the trouble right now.
+ if (A.getInfoCache().isInvolvedInMustTailCall(*getAssociatedArgument()))
+ return ChangeStatus::UNCHANGED;
+ return Base::manifest(A);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) }
+};
+
+struct AAAlignCallSiteArgument final : AAAlignFloating {
+ AAAlignCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAAlignFloating(IRP, A) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // If the associated argument is involved in a must-tail call we give up
+ // because we would need to keep the argument alignments of caller and
+ // callee in-sync. Just does not seem worth the trouble right now.
+ if (Argument *Arg = getAssociatedArgument())
+ if (A.getInfoCache().isInvolvedInMustTailCall(*Arg))
+ return ChangeStatus::UNCHANGED;
+ ChangeStatus Changed = AAAlignImpl::manifest(A);
+ Align InheritAlign =
+ getAssociatedValue().getPointerAlignment(A.getDataLayout());
+ if (InheritAlign >= getAssumedAlign())
+ Changed = ChangeStatus::UNCHANGED;
+ return Changed;
+ }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = AAAlignFloating::updateImpl(A);
+ if (Argument *Arg = getAssociatedArgument()) {
+ // We only take known information from the argument
+ // so we do not need to track a dependence.
+ const auto &ArgAlignAA = A.getAAFor<AAAlign>(
+ *this, IRPosition::argument(*Arg), /* TrackDependence */ false);
+ takeKnownMaximum(ArgAlignAA.getKnownAlign());
+ }
+ return Changed;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) }
+};
+
+/// Align attribute deduction for a call site return value.
+struct AAAlignCallSiteReturned final
+ : AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl> {
+ using Base = AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl>;
+ AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Base::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
+};
+
+/// ------------------ Function No-Return Attribute ----------------------------
+struct AANoReturnImpl : public AANoReturn {
+ AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AANoReturn::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F)
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "noreturn" : "may-return";
+ }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+ auto CheckForNoReturn = [](Instruction &) { return false; };
+ if (!A.checkForAllInstructions(CheckForNoReturn, *this,
+ {(unsigned)Instruction::Ret}))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+};
+
+struct AANoReturnFunction final : AANoReturnImpl {
+ AANoReturnFunction(const IRPosition &IRP, Attributor &A)
+ : AANoReturnImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) }
+};
+
+/// NoReturn attribute deduction for a call sites.
+struct AANoReturnCallSite final : AANoReturnImpl {
+ AANoReturnCallSite(const IRPosition &IRP, Attributor &A)
+ : AANoReturnImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoReturn::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
+};
+
+/// ----------------------- Variable Capturing ---------------------------------
+
+/// A class to hold the state of for no-capture attributes.
+struct AANoCaptureImpl : public AANoCapture {
+ AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ Function *AnchorScope = getAnchorScope();
+ if (isFnInterfaceKind() &&
+ (!AnchorScope || !A.isFunctionIPOAmendable(*AnchorScope))) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ // You cannot "capture" null in the default address space.
+ if (isa<ConstantPointerNull>(getAssociatedValue()) &&
+ getAssociatedValue().getType()->getPointerAddressSpace() == 0) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope;
+
+ // Check what state the associated function can actually capture.
+ if (F)
+ determineFunctionCaptureCapabilities(getIRPosition(), *F, *this);
+ else
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
+ virtual void
+ getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ if (!isAssumedNoCaptureMaybeReturned())
+ return;
+
+ if (getArgNo() >= 0) {
+ if (isAssumedNoCapture())
+ Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
+ else if (ManifestInternal)
+ Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned"));
+ }
+ }
+
+ /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
+ /// depending on the ability of the function associated with \p IRP to capture
+ /// state in memory and through "returning/throwing", respectively.
+ static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
+ const Function &F,
+ BitIntegerState &State) {
+ // TODO: Once we have memory behavior attributes we should use them here.
+
+ // If we know we cannot communicate or write to memory, we do not care about
+ // ptr2int anymore.
+ if (F.onlyReadsMemory() && F.doesNotThrow() &&
+ F.getReturnType()->isVoidTy()) {
+ State.addKnownBits(NO_CAPTURE);
+ return;
+ }
+
+ // A function cannot capture state in memory if it only reads memory, it can
+ // however return/throw state and the state might be influenced by the
+ // pointer value, e.g., loading from a returned pointer might reveal a bit.
+ if (F.onlyReadsMemory())
+ State.addKnownBits(NOT_CAPTURED_IN_MEM);
+
+ // A function cannot communicate state back if it does not through
+ // exceptions and doesn not return values.
+ if (F.doesNotThrow() && F.getReturnType()->isVoidTy())
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+
+ // Check existing "returned" attributes.
+ int ArgNo = IRP.getArgNo();
+ if (F.doesNotThrow() && ArgNo >= 0) {
+ for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
+ if (F.hasParamAttribute(u, Attribute::Returned)) {
+ if (u == unsigned(ArgNo))
+ State.removeAssumedBits(NOT_CAPTURED_IN_RET);
+ else if (F.onlyReadsMemory())
+ State.addKnownBits(NO_CAPTURE);
+ else
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+ break;
+ }
+ }
+ }
+
+ /// See AbstractState::getAsStr().
+ const std::string getAsStr() const override {
+ if (isKnownNoCapture())
+ return "known not-captured";
+ if (isAssumedNoCapture())
+ return "assumed not-captured";
+ if (isKnownNoCaptureMaybeReturned())
+ return "known not-captured-maybe-returned";
+ if (isAssumedNoCaptureMaybeReturned())
+ return "assumed not-captured-maybe-returned";
+ return "assumed-captured";
+ }
+};
+
+/// Attributor-aware capture tracker.
+struct AACaptureUseTracker final : public CaptureTracker {
+
+ /// Create a capture tracker that can lookup in-flight abstract attributes
+ /// through the Attributor \p A.
+ ///
+ /// If a use leads to a potential capture, \p CapturedInMemory is set and the
+ /// search is stopped. If a use leads to a return instruction,
+ /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
+ /// If a use leads to a ptr2int which may capture the value,
+ /// \p CapturedInInteger is set. If a use is found that is currently assumed
+ /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
+ /// set. All values in \p PotentialCopies are later tracked as well. For every
+ /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
+ /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
+ /// conservatively set to true.
+ AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
+ const AAIsDead &IsDeadAA, AANoCapture::StateType &State,
+ SmallVectorImpl<const Value *> &PotentialCopies,
+ unsigned &RemainingUsesToExplore)
+ : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
+ PotentialCopies(PotentialCopies),
+ RemainingUsesToExplore(RemainingUsesToExplore) {}
+
+ /// Determine if \p V maybe captured. *Also updates the state!*
+ bool valueMayBeCaptured(const Value *V) {
+ if (V->getType()->isPointerTy()) {
+ PointerMayBeCaptured(V, this);
+ } else {
+ State.indicatePessimisticFixpoint();
+ }
+ return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ }
+
+ /// See CaptureTracker::tooManyUses().
+ void tooManyUses() override {
+ State.removeAssumedBits(AANoCapture::NO_CAPTURE);
+ }
+
+ bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
+ if (CaptureTracker::isDereferenceableOrNull(O, DL))
+ return true;
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ NoCaptureAA, IRPosition::value(*O), /* TrackDependence */ true,
+ DepClassTy::OPTIONAL);
+ return DerefAA.getAssumedDereferenceableBytes();
+ }
+
+ /// See CaptureTracker::captured(...).
+ bool captured(const Use *U) override {
+ Instruction *UInst = cast<Instruction>(U->getUser());
+ LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
+ << "\n");
+
+ // Because we may reuse the tracker multiple times we keep track of the
+ // number of explored uses ourselves as well.
+ if (RemainingUsesToExplore-- == 0) {
+ LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+ }
+
+ // Deal with ptr2int by following uses.
+ if (isa<PtrToIntInst>(UInst)) {
+ LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
+ return valueMayBeCaptured(UInst);
+ }
+
+ // Explicitly catch return instructions.
+ if (isa<ReturnInst>(UInst))
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ true);
+
+ // For now we only use special logic for call sites. However, the tracker
+ // itself knows about a lot of other non-capturing cases already.
+ auto *CB = dyn_cast<CallBase>(UInst);
+ if (!CB || !CB->isArgOperand(U))
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+
+ unsigned ArgNo = CB->getArgOperandNo(U);
+ const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
+ // If we have a abstract no-capture attribute for the argument we can use
+ // it to justify a non-capture attribute here. This allows recursion!
+ auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
+ if (ArgNoCaptureAA.isAssumedNoCapture())
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ false);
+ if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ addPotentialCopy(*CB);
+ return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ /* Return */ false);
+ }
+
+ // Lastly, we could not find a reason no-capture can be assumed so we don't.
+ return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ /* Return */ true);
+ }
+
+ /// Register \p CS as potential copy of the value we are checking.
+ void addPotentialCopy(CallBase &CB) { PotentialCopies.push_back(&CB); }
+
+ /// See CaptureTracker::shouldExplore(...).
+ bool shouldExplore(const Use *U) override {
+ // Check liveness and ignore droppable users.
+ return !U->getUser()->isDroppable() &&
+ !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA);
+ }
+
+ /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
+ /// \p CapturedInRet, then return the appropriate value for use in the
+ /// CaptureTracker::captured() interface.
+ bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
+ bool CapturedInRet) {
+ LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
+ << CapturedInInt << "|Ret " << CapturedInRet << "]\n");
+ if (CapturedInMem)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM);
+ if (CapturedInInt)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
+ if (CapturedInRet)
+ State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
+ return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ }
+
+private:
+ /// The attributor providing in-flight abstract attributes.
+ Attributor &A;
+
+ /// The abstract attribute currently updated.
+ AANoCapture &NoCaptureAA;
+
+ /// The abstract liveness state.
+ const AAIsDead &IsDeadAA;
+
+ /// The state currently updated.
+ AANoCapture::StateType &State;
+
+ /// Set of potential copies of the tracked value.
+ SmallVectorImpl<const Value *> &PotentialCopies;
+
+ /// Global counter to limit the number of explored uses.
+ unsigned &RemainingUsesToExplore;
+};
+
+ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
+ const IRPosition &IRP = getIRPosition();
+ const Value *V =
+ getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue();
+ if (!V)
+ return indicatePessimisticFixpoint();
+
+ const Function *F =
+ getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
+ assert(F && "Expected a function!");
+ const IRPosition &FnPos = IRPosition::function(*F);
+ const auto &IsDeadAA =
+ A.getAAFor<AAIsDead>(*this, FnPos, /* TrackDependence */ false);
+
+ AANoCapture::StateType T;
+
+ // Readonly means we cannot capture through memory.
+ const auto &FnMemAA =
+ A.getAAFor<AAMemoryBehavior>(*this, FnPos, /* TrackDependence */ false);
+ if (FnMemAA.isAssumedReadOnly()) {
+ T.addKnownBits(NOT_CAPTURED_IN_MEM);
+ if (FnMemAA.isKnownReadOnly())
+ addKnownBits(NOT_CAPTURED_IN_MEM);
+ else
+ A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL);
+ }
+
+ // Make sure all returned values are different than the underlying value.
+ // TODO: we could do this in a more sophisticated way inside
+ // AAReturnedValues, e.g., track all values that escape through returns
+ // directly somehow.
+ auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
+ bool SeenConstant = false;
+ for (auto &It : RVAA.returned_values()) {
+ if (isa<Constant>(It.first)) {
+ if (SeenConstant)
+ return false;
+ SeenConstant = true;
+ } else if (!isa<Argument>(It.first) ||
+ It.first == getAssociatedArgument())
+ return false;
+ }
+ return true;
+ };
+
+ const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(
+ *this, FnPos, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ if (NoUnwindAA.isAssumedNoUnwind()) {
+ bool IsVoidTy = F->getReturnType()->isVoidTy();
+ const AAReturnedValues *RVAA =
+ IsVoidTy ? nullptr
+ : &A.getAAFor<AAReturnedValues>(*this, FnPos,
+ /* TrackDependence */ true,
+ DepClassTy::OPTIONAL);
+ if (IsVoidTy || CheckReturnedArgs(*RVAA)) {
+ T.addKnownBits(NOT_CAPTURED_IN_RET);
+ if (T.isKnown(NOT_CAPTURED_IN_MEM))
+ return ChangeStatus::UNCHANGED;
+ if (NoUnwindAA.isKnownNoUnwind() &&
+ (IsVoidTy || RVAA->getState().isAtFixpoint())) {
+ addKnownBits(NOT_CAPTURED_IN_RET);
+ if (isKnown(NOT_CAPTURED_IN_MEM))
+ return indicateOptimisticFixpoint();
+ }
+ }
+ }
+
+ // Use the CaptureTracker interface and logic with the specialized tracker,
+ // defined in AACaptureUseTracker, that can look at in-flight abstract
+ // attributes and directly updates the assumed state.
+ SmallVector<const Value *, 4> PotentialCopies;
+ unsigned RemainingUsesToExplore =
+ getDefaultMaxUsesToExploreForCaptureTracking();
+ AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
+ RemainingUsesToExplore);
+
+ // Check all potential copies of the associated value until we can assume
+ // none will be captured or we have to assume at least one might be.
+ unsigned Idx = 0;
+ PotentialCopies.push_back(V);
+ while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
+ Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
+
+ AANoCapture::StateType &S = getState();
+ auto Assumed = S.getAssumed();
+ S.intersectAssumedBits(T.getAssumed());
+ if (!isAssumedNoCaptureMaybeReturned())
+ return indicatePessimisticFixpoint();
+ return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+}
+
+/// NoCapture attribute for function arguments.
+struct AANoCaptureArgument final : AANoCaptureImpl {
+ AANoCaptureArgument(const IRPosition &IRP, Attributor &A)
+ : AANoCaptureImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) }
+};
+
+/// NoCapture attribute for call site arguments.
+struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
+ AANoCaptureCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoCaptureImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (Argument *Arg = getAssociatedArgument())
+ if (Arg->hasByValAttr())
+ indicateOptimisticFixpoint();
+ AANoCaptureImpl::initialize(A);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AANoCapture::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)};
+};
+
+/// NoCapture attribute for floating values.
+struct AANoCaptureFloating final : AANoCaptureImpl {
+ AANoCaptureFloating(const IRPosition &IRP, Attributor &A)
+ : AANoCaptureImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(nocapture)
+ }
+};
+
+/// NoCapture attribute for function return value.
+struct AANoCaptureReturned final : AANoCaptureImpl {
+ AANoCaptureReturned(const IRPosition &IRP, Attributor &A)
+ : AANoCaptureImpl(IRP, A) {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("NoCapture is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// NoCapture attribute deduction for a call site return value.
+struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
+ AANoCaptureCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AANoCaptureImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(nocapture)
+ }
+};
+
+/// ------------------ Value Simplify Attribute ----------------------------
+struct AAValueSimplifyImpl : AAValueSimplify {
+ AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplify(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (getAssociatedValue().getType()->isVoidTy())
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple")
+ : "not-simple";
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// See AAValueSimplify::getAssumedSimplifiedValue()
+ Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
+ if (!getAssumed())
+ return const_cast<Value *>(&getAssociatedValue());
+ return SimplifiedAssociatedValue;
+ }
+
+ /// Helper function for querying AAValueSimplify and updating candicate.
+ /// \param QueryingValue Value trying to unify with SimplifiedValue
+ /// \param AccumulatedSimplifiedValue Current simplification result.
+ static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA,
+ Value &QueryingValue,
+ Optional<Value *> &AccumulatedSimplifiedValue) {
+ // FIXME: Add a typecast support.
+
+ auto &ValueSimplifyAA = A.getAAFor<AAValueSimplify>(
+ QueryingAA, IRPosition::value(QueryingValue));
+
+ Optional<Value *> QueryingValueSimplified =
+ ValueSimplifyAA.getAssumedSimplifiedValue(A);
+
+ if (!QueryingValueSimplified.hasValue())
+ return true;
+
+ if (!QueryingValueSimplified.getValue())
+ return false;
+
+ Value &QueryingValueSimplifiedUnwrapped =
+ *QueryingValueSimplified.getValue();
+
+ if (AccumulatedSimplifiedValue.hasValue() &&
+ !isa<UndefValue>(AccumulatedSimplifiedValue.getValue()) &&
+ !isa<UndefValue>(QueryingValueSimplifiedUnwrapped))
+ return AccumulatedSimplifiedValue == QueryingValueSimplified;
+ if (AccumulatedSimplifiedValue.hasValue() &&
+ isa<UndefValue>(QueryingValueSimplifiedUnwrapped))
+ return true;
+
+ LLVM_DEBUG(dbgs() << "[ValueSimplify] " << QueryingValue
+ << " is assumed to be "
+ << QueryingValueSimplifiedUnwrapped << "\n");
+
+ AccumulatedSimplifiedValue = QueryingValueSimplified;
+ return true;
+ }
+
+ bool askSimplifiedValueForAAValueConstantRange(Attributor &A) {
+ if (!getAssociatedValue().getType()->isIntegerTy())
+ return false;
+
+ const auto &ValueConstantRangeAA =
+ A.getAAFor<AAValueConstantRange>(*this, getIRPosition());
+
+ Optional<ConstantInt *> COpt =
+ ValueConstantRangeAA.getAssumedConstantInt(A);
+ if (COpt.hasValue()) {
+ if (auto *C = COpt.getValue())
+ SimplifiedAssociatedValue = C;
+ else
+ return false;
+ } else {
+ SimplifiedAssociatedValue = llvm::None;
+ }
+ return true;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ if (SimplifiedAssociatedValue.hasValue() &&
+ !SimplifiedAssociatedValue.getValue())
+ return Changed;
+
+ Value &V = getAssociatedValue();
+ auto *C = SimplifiedAssociatedValue.hasValue()
+ ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+ : UndefValue::get(V.getType());
+ if (C) {
+ // We can replace the AssociatedValue with the constant.
+ if (!V.user_empty() && &V != C && V.getType() == C->getType()) {
+ LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *C
+ << " :: " << *this << "\n");
+ if (A.changeValueAfterManifest(V, *C))
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+
+ return Changed | AAValueSimplify::manifest(A);
+ }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...).
+ ChangeStatus indicatePessimisticFixpoint() override {
+ // NOTE: Associated value will be returned in a pessimistic fixpoint and is
+ // regarded as known. That's why`indicateOptimisticFixpoint` is called.
+ SimplifiedAssociatedValue = &getAssociatedValue();
+ indicateOptimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+protected:
+ // An assumed simplified value. Initially, it is set to Optional::None, which
+ // means that the value is not clear under current assumption. If in the
+ // pessimistic state, getAssumedSimplifiedValue doesn't return this value but
+ // returns orignal associated value.
+ Optional<Value *> SimplifiedAssociatedValue;
+};
+
+struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
+ AAValueSimplifyArgument(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyImpl(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ AAValueSimplifyImpl::initialize(A);
+ if (!getAnchorScope() || getAnchorScope()->isDeclaration())
+ indicatePessimisticFixpoint();
+ if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
+ Attribute::StructRet, Attribute::Nest},
+ /* IgnoreSubsumingPositions */ true))
+ indicatePessimisticFixpoint();
+
+ // FIXME: This is a hack to prevent us from propagating function poiner in
+ // the new pass manager CGSCC pass as it creates call edges the
+ // CallGraphUpdater cannot handle yet.
+ Value &V = getAssociatedValue();
+ if (V.getType()->isPointerTy() &&
+ V.getType()->getPointerElementType()->isFunctionTy() &&
+ !A.isModulePass())
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // Byval is only replacable if it is readonly otherwise we would write into
+ // the replaced value and not the copy that byval creates implicitly.
+ Argument *Arg = getAssociatedArgument();
+ if (Arg->hasByValAttr()) {
+ // TODO: We probably need to verify synchronization is not an issue, e.g.,
+ // there is no race by not copying a constant byval.
+ const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition());
+ if (!MemAA.isAssumedReadOnly())
+ return indicatePessimisticFixpoint();
+ }
+
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto PredForCallSite = [&](AbstractCallSite ACS) {
+ const IRPosition &ACSArgPos =
+ IRPosition::callsite_argument(ACS, getArgNo());
+ // Check if a coresponding argument was found or if it is on not
+ // associated (which can happen for callback calls).
+ if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+ return false;
+
+ // We can only propagate thread independent values through callbacks.
+ // This is different to direct/indirect call sites because for them we
+ // know the thread executing the caller and callee is the same. For
+ // callbacks this is not guaranteed, thus a thread dependent value could
+ // be different for the caller and callee, making it invalid to propagate.
+ Value &ArgOp = ACSArgPos.getAssociatedValue();
+ if (ACS.isCallbackCall())
+ if (auto *C = dyn_cast<Constant>(&ArgOp))
+ if (C->isThreadDependent())
+ return false;
+ return checkAndUpdate(A, *this, ArgOp, SimplifiedAssociatedValue);
+ };
+
+ bool AllCallSitesKnown;
+ if (!A.checkForAllCallSites(PredForCallSite, *this, true,
+ AllCallSitesKnown))
+ if (!askSimplifiedValueForAAValueConstantRange(A))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyReturned : AAValueSimplifyImpl {
+ AAValueSimplifyReturned(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto PredForReturned = [&](Value &V) {
+ return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
+ };
+
+ if (!A.checkForAllReturnedValues(PredForReturned, *this))
+ if (!askSimplifiedValueForAAValueConstantRange(A))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ if (SimplifiedAssociatedValue.hasValue() &&
+ !SimplifiedAssociatedValue.getValue())
+ return Changed;
+
+ Value &V = getAssociatedValue();
+ auto *C = SimplifiedAssociatedValue.hasValue()
+ ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+ : UndefValue::get(V.getType());
+ if (C) {
+ auto PredForReturned =
+ [&](Value &V, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
+ // We can replace the AssociatedValue with the constant.
+ if (&V == C || V.getType() != C->getType() || isa<UndefValue>(V))
+ return true;
+
+ for (ReturnInst *RI : RetInsts) {
+ if (RI->getFunction() != getAnchorScope())
+ continue;
+ auto *RC = C;
+ if (RC->getType() != RI->getReturnValue()->getType())
+ RC = ConstantExpr::getBitCast(RC,
+ RI->getReturnValue()->getType());
+ LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *RC
+ << " in " << *RI << " :: " << *this << "\n");
+ if (A.changeUseAfterManifest(RI->getOperandUse(0), *RC))
+ Changed = ChangeStatus::CHANGED;
+ }
+ return true;
+ };
+ A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this);
+ }
+
+ return Changed | AAValueSimplify::manifest(A);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyFloating : AAValueSimplifyImpl {
+ AAValueSimplifyFloating(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // FIXME: This might have exposed a SCC iterator update bug in the old PM.
+ // Needs investigation.
+ // AAValueSimplifyImpl::initialize(A);
+ Value &V = getAnchorValue();
+
+ // TODO: add other stuffs
+ if (isa<Constant>(V))
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool HasValueBefore = SimplifiedAssociatedValue.hasValue();
+
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
+ bool Stripped) -> bool {
+ auto &AA = A.getAAFor<AAValueSimplify>(*this, IRPosition::value(V));
+ if (!Stripped && this == &AA) {
+ // TODO: Look the instruction and check recursively.
+
+ LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V
+ << "\n");
+ return false;
+ }
+ return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue);
+ };
+
+ bool Dummy = false;
+ if (!genericValueTraversal<AAValueSimplify, bool>(
+ A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(),
+ /* UseValueSimplify */ false))
+ if (!askSimplifiedValueForAAValueConstantRange(A))
+ return indicatePessimisticFixpoint();
+
+ // If a candicate was found in this update, return CHANGED.
+
+ return HasValueBefore == SimplifiedAssociatedValue.hasValue()
+ ? ChangeStatus::UNCHANGED
+ : ChangeStatus ::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyFunction : AAValueSimplifyImpl {
+ AAValueSimplifyFunction(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ SimplifiedAssociatedValue = &getAnchorValue();
+ indicateOptimisticFixpoint();
+ }
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable(
+ "AAValueSimplify(Function|CallSite)::updateImpl will not be called");
+ }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyCallSite : AAValueSimplifyFunction {
+ AAValueSimplifyCallSite(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyFunction(IRP, A) {}
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(value_simplify)
+ }
+};
+
+struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned {
+ AAValueSimplifyCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyReturned(IRP, A) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ return AAValueSimplifyImpl::manifest(A);
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(value_simplify)
+ }
+};
+struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
+ AAValueSimplifyCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAValueSimplifyFloating(IRP, A) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ if (SimplifiedAssociatedValue.hasValue() &&
+ !SimplifiedAssociatedValue.getValue())
+ return Changed;
+
+ Value &V = getAssociatedValue();
+ auto *C = SimplifiedAssociatedValue.hasValue()
+ ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+ : UndefValue::get(V.getType());
+ if (C) {
+ Use &U = cast<CallBase>(&getAnchorValue())->getArgOperandUse(getArgNo());
+ // We can replace the AssociatedValue with the constant.
+ if (&V != C && V.getType() == C->getType()) {
+ if (A.changeUseAfterManifest(U, *C))
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+
+ return Changed | AAValueSimplify::manifest(A);
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(value_simplify)
+ }
+};
+
+/// ----------------------- Heap-To-Stack Conversion ---------------------------
+struct AAHeapToStackImpl : public AAHeapToStack {
+ AAHeapToStackImpl(const IRPosition &IRP, Attributor &A)
+ : AAHeapToStack(IRP, A) {}
+
+ const std::string getAsStr() const override {
+ return "[H2S] Mallocs: " + std::to_string(MallocCalls.size());
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ Function *F = getAnchorScope();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ for (Instruction *MallocCall : MallocCalls) {
+ // This malloc cannot be replaced.
+ if (BadMallocCalls.count(MallocCall))
+ continue;
+
+ for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
+ LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
+ A.deleteAfterManifest(*FreeCall);
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
+ << "\n");
+
+ Align Alignment;
+ Constant *Size;
+ if (isCallocLikeFn(MallocCall, TLI)) {
+ auto *Num = cast<ConstantInt>(MallocCall->getOperand(0));
+ auto *SizeT = cast<ConstantInt>(MallocCall->getOperand(1));
+ APInt TotalSize = SizeT->getValue() * Num->getValue();
+ Size =
+ ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize);
+ } else if (isAlignedAllocLikeFn(MallocCall, TLI)) {
+ Size = cast<ConstantInt>(MallocCall->getOperand(1));
+ Alignment = MaybeAlign(cast<ConstantInt>(MallocCall->getOperand(0))
+ ->getValue()
+ .getZExtValue())
+ .valueOrOne();
+ } else {
+ Size = cast<ConstantInt>(MallocCall->getOperand(0));
+ }
+
+ unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
+ Instruction *AI =
+ new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
+ "", MallocCall->getNextNode());
+
+ if (AI->getType() != MallocCall->getType())
+ AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
+ AI->getNextNode());
+
+ A.changeValueAfterManifest(*MallocCall, *AI);
+
+ if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
+ auto *NBB = II->getNormalDest();
+ BranchInst::Create(NBB, MallocCall->getParent());
+ A.deleteAfterManifest(*MallocCall);
+ } else {
+ A.deleteAfterManifest(*MallocCall);
+ }
+
+ // Zero out the allocated memory if it was a calloc.
+ if (isCallocLikeFn(MallocCall, TLI)) {
+ auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
+ AI->getNextNode());
+ Value *Ops[] = {
+ BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
+
+ Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
+ Module *M = F->getParent();
+ Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+ CallInst::Create(Fn, Ops, "", BI->getNextNode());
+ }
+ HasChanged = ChangeStatus::CHANGED;
+ }
+
+ return HasChanged;
+ }
+
+ /// Collection of all malloc calls in a function.
+ SmallSetVector<Instruction *, 4> MallocCalls;
+
+ /// Collection of malloc calls that cannot be converted.
+ DenseSet<const Instruction *> BadMallocCalls;
+
+ /// A map for each malloc call to the set of associated free calls.
+ DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
+
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
+ const Function *F = getAnchorScope();
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+ MustBeExecutedContextExplorer &Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
+
+ auto FreeCheck = [&](Instruction &I) {
+ const auto &Frees = FreesForMalloc.lookup(&I);
+ if (Frees.size() != 1)
+ return false;
+ Instruction *UniqueFree = *Frees.begin();
+ return Explorer.findInContextOf(UniqueFree, I.getNextNode());
+ };
+
+ auto UsesCheck = [&](Instruction &I) {
+ bool ValidUsesOnly = true;
+ bool MustUse = true;
+ auto Pred = [&](const Use &U, bool &Follow) -> bool {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (isa<LoadInst>(UserI))
+ return true;
+ if (auto *SI = dyn_cast<StoreInst>(UserI)) {
+ if (SI->getValueOperand() == U.get()) {
+ LLVM_DEBUG(dbgs()
+ << "[H2S] escaping store to memory: " << *UserI << "\n");
+ ValidUsesOnly = false;
+ } else {
+ // A store into the malloc'ed memory is fine.
+ }
+ return true;
+ }
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
+ return true;
+ // Record malloc.
+ if (isFreeCall(UserI, TLI)) {
+ if (MustUse) {
+ FreesForMalloc[&I].insert(UserI);
+ } else {
+ LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: "
+ << *UserI << "\n");
+ ValidUsesOnly = false;
+ }
+ return true;
+ }
+
+ unsigned ArgNo = CB->getArgOperandNo(&U);
+
+ const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+
+ // If a callsite argument use is nofree, we are fine.
+ const auto &ArgNoFreeAA = A.getAAFor<AANoFree>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo));
+
+ if (!NoCaptureAA.isAssumedNoCapture() ||
+ !ArgNoFreeAA.isAssumedNoFree()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
+ ValidUsesOnly = false;
+ }
+ return true;
+ }
+
+ if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
+ isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+ MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
+ Follow = true;
+ return true;
+ }
+ // Unknown user for which we can not track uses further (in a way that
+ // makes sense).
+ LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
+ ValidUsesOnly = false;
+ return true;
+ };
+ A.checkForAllUses(Pred, *this, I);
+ return ValidUsesOnly;
+ };
+
+ auto MallocCallocCheck = [&](Instruction &I) {
+ if (BadMallocCalls.count(&I))
+ return true;
+
+ bool IsMalloc = isMallocLikeFn(&I, TLI);
+ bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI);
+ bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
+ if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) {
+ BadMallocCalls.insert(&I);
+ return true;
+ }
+
+ if (IsMalloc) {
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
+ if (Size->getValue().ule(MaxHeapToStackSize))
+ if (UsesCheck(I) || FreeCheck(I)) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ } else if (IsAlignedAllocLike && isa<ConstantInt>(I.getOperand(0))) {
+ // Only if the alignment and sizes are constant.
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
+ if (Size->getValue().ule(MaxHeapToStackSize))
+ if (UsesCheck(I) || FreeCheck(I)) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ } else if (IsCalloc) {
+ bool Overflow = false;
+ if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
+ if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
+ if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
+ .ule(MaxHeapToStackSize))
+ if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
+ MallocCalls.insert(&I);
+ return true;
+ }
+ }
+
+ BadMallocCalls.insert(&I);
+ return true;
+ };
+
+ size_t NumBadMallocs = BadMallocCalls.size();
+
+ A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
+
+ if (NumBadMallocs != BadMallocCalls.size())
+ return ChangeStatus::CHANGED;
+
+ return ChangeStatus::UNCHANGED;
+}
+
+struct AAHeapToStackFunction final : public AAHeapToStackImpl {
+ AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
+ : AAHeapToStackImpl(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics().
+ void trackStatistics() const override {
+ STATS_DECL(
+ MallocCalls, Function,
+ "Number of malloc/calloc/aligned_alloc calls converted to allocas");
+ for (auto *C : MallocCalls)
+ if (!BadMallocCalls.count(C))
+ ++BUILD_STAT_NAME(MallocCalls, Function);
+ }
+};
+
+/// ----------------------- Privatizable Pointers ------------------------------
+struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
+ AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A)
+ : AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {}
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ AAPrivatizablePtr::indicatePessimisticFixpoint();
+ PrivatizableType = nullptr;
+ return ChangeStatus::CHANGED;
+ }
+
+ /// Identify the type we can chose for a private copy of the underlying
+ /// argument. None means it is not clear yet, nullptr means there is none.
+ virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
+
+ /// Return a privatizable type that encloses both T0 and T1.
+ /// TODO: This is merely a stub for now as we should manage a mapping as well.
+ Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
+ if (!T0.hasValue())
+ return T1;
+ if (!T1.hasValue())
+ return T0;
+ if (T0 == T1)
+ return T0;
+ return nullptr;
+ }
+
+ Optional<Type *> getPrivatizableType() const override {
+ return PrivatizableType;
+ }
+
+ const std::string getAsStr() const override {
+ return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
+ }
+
+protected:
+ Optional<Type *> PrivatizableType;
+};
+
+// TODO: Do this for call site arguments (probably also other values) as well.
+
+struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
+ AAPrivatizablePtrArgument(const IRPosition &IRP, Attributor &A)
+ : AAPrivatizablePtrImpl(IRP, A) {}
+
+ /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+ Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+ // If this is a byval argument and we know all the call sites (so we can
+ // rewrite them), there is no need to check them explicitly.
+ bool AllCallSitesKnown;
+ if (getIRPosition().hasAttr(Attribute::ByVal) &&
+ A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
+ true, AllCallSitesKnown))
+ return getAssociatedValue().getType()->getPointerElementType();
+
+ Optional<Type *> Ty;
+ unsigned ArgNo = getIRPosition().getArgNo();
+
+ // Make sure the associated call site argument has the same type at all call
+ // sites and it is an allocation we know is safe to privatize, for now that
+ // means we only allow alloca instructions.
+ // TODO: We can additionally analyze the accesses in the callee to create
+ // the type from that information instead. That is a little more
+ // involved and will be done in a follow up patch.
+ auto CallSiteCheck = [&](AbstractCallSite ACS) {
+ IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+ // Check if a coresponding argument was found or if it is one not
+ // associated (which can happen for callback calls).
+ if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+ return false;
+
+ // Check that all call sites agree on a type.
+ auto &PrivCSArgAA = A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos);
+ Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
+
+ LLVM_DEBUG({
+ dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
+ if (CSTy.hasValue() && CSTy.getValue())
+ CSTy.getValue()->print(dbgs());
+ else if (CSTy.hasValue())
+ dbgs() << "<nullptr>";
+ else
+ dbgs() << "<none>";
+ });
+
+ Ty = combineTypes(Ty, CSTy);
+
+ LLVM_DEBUG({
+ dbgs() << " : New Type: ";
+ if (Ty.hasValue() && Ty.getValue())
+ Ty.getValue()->print(dbgs());
+ else if (Ty.hasValue())
+ dbgs() << "<nullptr>";
+ else
+ dbgs() << "<none>";
+ dbgs() << "\n";
+ });
+
+ return !Ty.hasValue() || Ty.getValue();
+ };
+
+ if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown))
+ return nullptr;
+ return Ty;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ PrivatizableType = identifyPrivatizableType(A);
+ if (!PrivatizableType.hasValue())
+ return ChangeStatus::UNCHANGED;
+ if (!PrivatizableType.getValue())
+ return indicatePessimisticFixpoint();
+
+ // The dependence is optional so we don't give up once we give up on the
+ // alignment.
+ A.getAAFor<AAAlign>(*this, IRPosition::value(getAssociatedValue()),
+ /* TrackDependence */ true, DepClassTy::OPTIONAL);
+
+ // Avoid arguments with padding for now.
+ if (!getIRPosition().hasAttr(Attribute::ByVal) &&
+ !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
+ A.getInfoCache().getDL())) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ // Verify callee and caller agree on how the promoted argument would be
+ // passed.
+ // TODO: The use of the ArgumentPromotion interface here is ugly, we need a
+ // specialized form of TargetTransformInfo::areFunctionArgsABICompatible
+ // which doesn't require the arguments ArgumentPromotion wanted to pass.
+ Function &Fn = *getIRPosition().getAnchorScope();
+ SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy;
+ ArgsToPromote.insert(getAssociatedArgument());
+ const auto *TTI =
+ A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn);
+ if (!TTI ||
+ !ArgumentPromotionPass::areFunctionArgsABICompatible(
+ Fn, *TTI, ArgsToPromote, Dummy) ||
+ ArgsToPromote.empty()) {
+ LLVM_DEBUG(
+ dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
+ << Fn.getName() << "\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ // Collect the types that will replace the privatizable type in the function
+ // signature.
+ SmallVector<Type *, 16> ReplacementTypes;
+ identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
+ // Register a rewrite of the argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Rewrite not valid\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ unsigned ArgNo = Arg->getArgNo();
+
+ // Helper to check if for the given call site the associated argument is
+ // passed to a callback where the privatization would be different.
+ auto IsCompatiblePrivArgOfCallback = [&](CallBase &CB) {
+ SmallVector<const Use *, 4> CallbackUses;
+ AbstractCallSite::getCallbackUses(CB, CallbackUses);
+ for (const Use *U : CallbackUses) {
+ AbstractCallSite CBACS(U);
+ assert(CBACS && CBACS.isCallbackCall());
+ for (Argument &CBArg : CBACS.getCalledFunction()->args()) {
+ int CBArgNo = CBACS.getCallArgOperandNo(CBArg);
+
+ LLVM_DEBUG({
+ dbgs()
+ << "[AAPrivatizablePtr] Argument " << *Arg
+ << "check if can be privatized in the context of its parent ("
+ << Arg->getParent()->getName()
+ << ")\n[AAPrivatizablePtr] because it is an argument in a "
+ "callback ("
+ << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
+ << ")\n[AAPrivatizablePtr] " << CBArg << " : "
+ << CBACS.getCallArgOperand(CBArg) << " vs "
+ << CB.getArgOperand(ArgNo) << "\n"
+ << "[AAPrivatizablePtr] " << CBArg << " : "
+ << CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
+ });
+
+ if (CBArgNo != int(ArgNo))
+ continue;
+ const auto &CBArgPrivAA =
+ A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(CBArg));
+ if (CBArgPrivAA.isValidState()) {
+ auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
+ if (!CBArgPrivTy.hasValue())
+ continue;
+ if (CBArgPrivTy.getValue() == PrivatizableType)
+ continue;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
+ << " cannot be privatized in the context of its parent ("
+ << Arg->getParent()->getName()
+ << ")\n[AAPrivatizablePtr] because it is an argument in a "
+ "callback ("
+ << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
+ << ").\n[AAPrivatizablePtr] for which the argument "
+ "privatization is not compatible.\n";
+ });
+ return false;
+ }
+ }
+ return true;
+ };
+
+ // Helper to check if for the given call site the associated argument is
+ // passed to a direct call where the privatization would be different.
+ auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
+ CallBase *DC = cast<CallBase>(ACS.getInstruction());
+ int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
+ assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() &&
+ "Expected a direct call operand for callback call operand");
+
+ LLVM_DEBUG({
+ dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
+ << " check if be privatized in the context of its parent ("
+ << Arg->getParent()->getName()
+ << ")\n[AAPrivatizablePtr] because it is an argument in a "
+ "direct call of ("
+ << DCArgNo << "@" << DC->getCalledFunction()->getName()
+ << ").\n";
+ });
+
+ Function *DCCallee = DC->getCalledFunction();
+ if (unsigned(DCArgNo) < DCCallee->arg_size()) {
+ const auto &DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
+ *this, IRPosition::argument(*DCCallee->getArg(DCArgNo)));
+ if (DCArgPrivAA.isValidState()) {
+ auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
+ if (!DCArgPrivTy.hasValue())
+ return true;
+ if (DCArgPrivTy.getValue() == PrivatizableType)
+ return true;
+ }
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
+ << " cannot be privatized in the context of its parent ("
+ << Arg->getParent()->getName()
+ << ")\n[AAPrivatizablePtr] because it is an argument in a "
+ "direct call of ("
+ << ACS.getInstruction()->getCalledFunction()->getName()
+ << ").\n[AAPrivatizablePtr] for which the argument "
+ "privatization is not compatible.\n";
+ });
+ return false;
+ };
+
+ // Helper to check if the associated argument is used at the given abstract
+ // call site in a way that is incompatible with the privatization assumed
+ // here.
+ auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
+ if (ACS.isDirectCall())
+ return IsCompatiblePrivArgOfCallback(*ACS.getInstruction());
+ if (ACS.isCallbackCall())
+ return IsCompatiblePrivArgOfDirectCS(ACS);
+ return false;
+ };
+
+ bool AllCallSitesKnown;
+ if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true,
+ AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// Given a type to private \p PrivType, collect the constituates (which are
+ /// used) in \p ReplacementTypes.
+ static void
+ identifyReplacementTypes(Type *PrivType,
+ SmallVectorImpl<Type *> &ReplacementTypes) {
+ // TODO: For now we expand the privatization type to the fullest which can
+ // lead to dead arguments that need to be removed later.
+ assert(PrivType && "Expected privatizable type!");
+
+ // Traverse the type, extract constituate types on the outermost level.
+ if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+ for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++)
+ ReplacementTypes.push_back(PrivStructType->getElementType(u));
+ } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+ ReplacementTypes.append(PrivArrayType->getNumElements(),
+ PrivArrayType->getElementType());
+ } else {
+ ReplacementTypes.push_back(PrivType);
+ }
+ }
+
+ /// Initialize \p Base according to the type \p PrivType at position \p IP.
+ /// The values needed are taken from the arguments of \p F starting at
+ /// position \p ArgNo.
+ static void createInitialization(Type *PrivType, Value &Base, Function &F,
+ unsigned ArgNo, Instruction &IP) {
+ assert(PrivType && "Expected privatizable type!");
+
+ IRBuilder<NoFolder> IRB(&IP);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ // Traverse the type, build GEPs and stores.
+ if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+ const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+ for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+ Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
+ Value *Ptr = constructPointer(
+ PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
+ new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+ }
+ } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+ Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo();
+ uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy);
+ for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+ Value *Ptr =
+ constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
+ new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+ }
+ } else {
+ new StoreInst(F.getArg(ArgNo), &Base, &IP);
+ }
+ }
+
+ /// Extract values from \p Base according to the type \p PrivType at the
+ /// call position \p ACS. The values are appended to \p ReplacementValues.
+ void createReplacementValues(Align Alignment, Type *PrivType,
+ AbstractCallSite ACS, Value *Base,
+ SmallVectorImpl<Value *> &ReplacementValues) {
+ assert(Base && "Expected base value!");
+ assert(PrivType && "Expected privatizable type!");
+ Instruction *IP = ACS.getInstruction();
+
+ IRBuilder<NoFolder> IRB(IP);
+ const DataLayout &DL = IP->getModule()->getDataLayout();
+
+ if (Base->getType()->getPointerElementType() != PrivType)
+ Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
+ "", ACS.getInstruction());
+
+ // Traverse the type, build GEPs and loads.
+ if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+ const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+ for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+ Type *PointeeTy = PrivStructType->getElementType(u);
+ Value *Ptr =
+ constructPointer(PointeeTy->getPointerTo(), Base,
+ PrivStructLayout->getElementOffset(u), IRB, DL);
+ LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
+ L->setAlignment(Alignment);
+ ReplacementValues.push_back(L);
+ }
+ } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+ Type *PointeeTy = PrivArrayType->getElementType();
+ uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
+ Type *PointeePtrTy = PointeeTy->getPointerTo();
+ for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+ Value *Ptr =
+ constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
+ LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
+ L->setAlignment(Alignment);
+ ReplacementValues.push_back(L);
+ }
+ } else {
+ LoadInst *L = new LoadInst(PrivType, Base, "", IP);
+ L->setAlignment(Alignment);
+ ReplacementValues.push_back(L);
+ }
+ }
+
+ /// See AbstractAttribute::manifest(...)
+ ChangeStatus manifest(Attributor &A) override {
+ if (!PrivatizableType.hasValue())
+ return ChangeStatus::UNCHANGED;
+ assert(PrivatizableType.getValue() && "Expected privatizable type!");
+
+ // Collect all tail calls in the function as we cannot allow new allocas to
+ // escape into tail recursion.
+ // TODO: Be smarter about new allocas escaping into tail calls.
+ SmallVector<CallInst *, 16> TailCalls;
+ if (!A.checkForAllInstructions(
+ [&](Instruction &I) {
+ CallInst &CI = cast<CallInst>(I);
+ if (CI.isTailCall())
+ TailCalls.push_back(&CI);
+ return true;
+ },
+ *this, {Instruction::Call}))
+ return ChangeStatus::UNCHANGED;
+
+ Argument *Arg = getAssociatedArgument();
+ // Query AAAlign attribute for alignment of associated argument to
+ // determine the best alignment of loads.
+ const auto &AlignAA = A.getAAFor<AAAlign>(*this, IRPosition::value(*Arg));
+
+ // Callback to repair the associated function. A new alloca is placed at the
+ // beginning and initialized with the values passed through arguments. The
+ // new alloca replaces the use of the old pointer argument.
+ Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB =
+ [=](const Attributor::ArgumentReplacementInfo &ARI,
+ Function &ReplacementFn, Function::arg_iterator ArgIt) {
+ BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
+ Instruction *IP = &*EntryBB.getFirstInsertionPt();
+ auto *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+ Arg->getName() + ".priv", IP);
+ createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
+ ArgIt->getArgNo(), *IP);
+ Arg->replaceAllUsesWith(AI);
+
+ for (CallInst *CI : TailCalls)
+ CI->setTailCall(false);
+ };
+
+ // Callback to repair a call site of the associated function. The elements
+ // of the privatizable type are loaded prior to the call and passed to the
+ // new function version.
+ Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
+ [=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI,
+ AbstractCallSite ACS,
+ SmallVectorImpl<Value *> &NewArgOperands) {
+ // When no alignment is specified for the load instruction,
+ // natural alignment is assumed.
+ createReplacementValues(
+ assumeAligned(AlignAA.getAssumedAlign()),
+ PrivatizableType.getValue(), ACS,
+ ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
+ NewArgOperands);
+ };
+
+ // Collect the types that will replace the privatizable type in the function
+ // signature.
+ SmallVector<Type *, 16> ReplacementTypes;
+ identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
+ // Register a rewrite of the argument.
+ if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes,
+ std::move(FnRepairCB),
+ std::move(ACSRepairCB)))
+ return ChangeStatus::CHANGED;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(privatizable_ptr);
+ }
+};
+
+struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
+ AAPrivatizablePtrFloating(const IRPosition &IRP, Attributor &A)
+ : AAPrivatizablePtrImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ virtual void initialize(Attributor &A) override {
+ // TODO: We can privatize more than arguments.
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::"
+ "updateImpl will not be called");
+ }
+
+ /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+ Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+ Value *Obj =
+ GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL());
+ if (!Obj) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
+ return nullptr;
+ }
+
+ if (auto *AI = dyn_cast<AllocaInst>(Obj))
+ if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+ if (CI->isOne())
+ return Obj->getType()->getPointerElementType();
+ if (auto *Arg = dyn_cast<Argument>(Obj)) {
+ auto &PrivArgAA =
+ A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(*Arg));
+ if (PrivArgAA.isAssumedPrivatizablePtr())
+ return Obj->getType()->getPointerElementType();
+ }
+
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
+ "alloca nor privatizable argument: "
+ << *Obj << "!\n");
+ return nullptr;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr);
+ }
+};
+
+struct AAPrivatizablePtrCallSiteArgument final
+ : public AAPrivatizablePtrFloating {
+ AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAPrivatizablePtrFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (getIRPosition().hasAttr(Attribute::ByVal))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ PrivatizableType = identifyPrivatizableType(A);
+ if (!PrivatizableType.hasValue())
+ return ChangeStatus::UNCHANGED;
+ if (!PrivatizableType.getValue())
+ return indicatePessimisticFixpoint();
+
+ const IRPosition &IRP = getIRPosition();
+ auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+ if (!NoCaptureAA.isAssumedNoCapture()) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+ if (!NoAliasAA.isAssumedNoAlias()) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, IRP);
+ if (!MemBehaviorAA.isAssumedReadOnly()) {
+ LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
+ return indicatePessimisticFixpoint();
+ }
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr);
+ }
+};
+
+struct AAPrivatizablePtrCallSiteReturned final
+ : public AAPrivatizablePtrFloating {
+ AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAPrivatizablePtrFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: We can privatize more than arguments.
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr);
+ }
+};
+
+struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
+ AAPrivatizablePtrReturned(const IRPosition &IRP, Attributor &A)
+ : AAPrivatizablePtrFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: We can privatize more than arguments.
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
+ }
+};
+
+/// -------------------- Memory Behavior Attributes ----------------------------
+/// Includes read-none, read-only, and write-only.
+/// ----------------------------------------------------------------------------
+struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
+ AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehavior(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ intersectAssumedBits(BEST_STATE);
+ getKnownStateFromValue(getIRPosition(), getState());
+ IRAttribute::initialize(A);
+ }
+
+ /// Return the memory behavior information encoded in the IR for \p IRP.
+ static void getKnownStateFromValue(const IRPosition &IRP,
+ BitIntegerState &State,
+ bool IgnoreSubsumingPositions = false) {
+ SmallVector<Attribute, 2> Attrs;
+ IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
+ for (const Attribute &Attr : Attrs) {
+ switch (Attr.getKindAsEnum()) {
+ case Attribute::ReadNone:
+ State.addKnownBits(NO_ACCESSES);
+ break;
+ case Attribute::ReadOnly:
+ State.addKnownBits(NO_WRITES);
+ break;
+ case Attribute::WriteOnly:
+ State.addKnownBits(NO_READS);
+ break;
+ default:
+ llvm_unreachable("Unexpected attribute!");
+ }
+ }
+
+ if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) {
+ if (!I->mayReadFromMemory())
+ State.addKnownBits(NO_READS);
+ if (!I->mayWriteToMemory())
+ State.addKnownBits(NO_WRITES);
+ }
+ }
+
+ /// See AbstractAttribute::getDeducedAttributes(...).
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ assert(Attrs.size() == 0);
+ if (isAssumedReadNone())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
+ else if (isAssumedReadOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly));
+ else if (isAssumedWriteOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly));
+ assert(Attrs.size() <= 1);
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ if (hasAttr(Attribute::ReadNone, /* IgnoreSubsumingPositions */ true))
+ return ChangeStatus::UNCHANGED;
+
+ const IRPosition &IRP = getIRPosition();
+
+ // Check if we would improve the existing attributes first.
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
+ if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
+ return IRP.hasAttr(Attr.getKindAsEnum(),
+ /* IgnoreSubsumingPositions */ true);
+ }))
+ return ChangeStatus::UNCHANGED;
+
+ // Clear existing attributes.
+ IRP.removeAttrs(AttrKinds);
+
+ // Use the generic manifest method.
+ return IRAttribute::manifest(A);
+ }
+
+ /// See AbstractState::getAsStr().
+ const std::string getAsStr() const override {
+ if (isAssumedReadNone())
+ return "readnone";
+ if (isAssumedReadOnly())
+ return "readonly";
+ if (isAssumedWriteOnly())
+ return "writeonly";
+ return "may-read/write";
+ }
+
+ /// The set of IR attributes AAMemoryBehavior deals with.
+ static const Attribute::AttrKind AttrKinds[3];
+};
+
+const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = {
+ Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly};
+
+/// Memory behavior attribute for a floating value.
+struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
+ AAMemoryBehaviorFloating(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehaviorImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ // Initialize the use vector with all direct uses of the associated value.
+ for (const Use &U : getAssociatedValue().uses())
+ Uses.insert(&U);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FLOATING_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_FLOATING_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_FLOATING_ATTR(writeonly)
+ }
+
+private:
+ /// Return true if users of \p UserI might access the underlying
+ /// variable/location described by \p U and should therefore be analyzed.
+ bool followUsersOfUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI);
+
+ /// Update the state according to the effect of use \p U in \p UserI.
+ void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI);
+
+protected:
+ /// Container for (transitive) uses of the associated argument.
+ SetVector<const Use *> Uses;
+};
+
+/// Memory behavior attribute for function argument.
+struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
+ AAMemoryBehaviorArgument(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehaviorFloating(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ intersectAssumedBits(BEST_STATE);
+ const IRPosition &IRP = getIRPosition();
+ // TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we
+ // can query it when we use has/getAttr. That would allow us to reuse the
+ // initialize of the base class here.
+ bool HasByVal =
+ IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true);
+ getKnownStateFromValue(IRP, getState(),
+ /* IgnoreSubsumingPositions */ HasByVal);
+
+ // Initialize the use vector with all direct uses of the associated value.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent()))) {
+ indicatePessimisticFixpoint();
+ } else {
+ // Initialize the use vector with all direct uses of the associated value.
+ for (const Use &U : Arg->uses())
+ Uses.insert(&U);
+ }
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ // TODO: Pointer arguments are not supported on vectors of pointers yet.
+ if (!getAssociatedValue().getType()->isPointerTy())
+ return ChangeStatus::UNCHANGED;
+
+ // TODO: From readattrs.ll: "inalloca parameters are always
+ // considered written"
+ if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
+ removeKnownBits(NO_WRITES);
+ removeAssumedBits(NO_WRITES);
+ }
+ return AAMemoryBehaviorFloating::manifest(A);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_ARG_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_ARG_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_ARG_ATTR(writeonly)
+ }
+};
+
+struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
+ AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehaviorArgument(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (Argument *Arg = getAssociatedArgument()) {
+ if (Arg->hasByValAttr()) {
+ addKnownBits(NO_WRITES);
+ removeKnownBits(NO_READS);
+ removeAssumedBits(NO_READS);
+ }
+ }
+ AAMemoryBehaviorArgument::initialize(A);
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA = A.getAAFor<AAMemoryBehavior>(*this, ArgPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AAMemoryBehavior::StateType &>(ArgAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CSARG_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_CSARG_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_CSARG_ATTR(writeonly)
+ }
+};
+
+/// Memory behavior attribute for a call site return position.
+struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
+ AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehaviorFloating(IRP, A) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // We do not annotate returned values.
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// An AA to represent the memory behavior function attributes.
+struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
+ AAMemoryBehaviorFunction(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehaviorImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ Function &F = cast<Function>(getAnchorValue());
+ if (isAssumedReadNone()) {
+ F.removeFnAttr(Attribute::ArgMemOnly);
+ F.removeFnAttr(Attribute::InaccessibleMemOnly);
+ F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+ }
+ return AAMemoryBehaviorImpl::manifest(A);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FN_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_FN_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_FN_ATTR(writeonly)
+ }
+};
+
+/// AAMemoryBehavior attribute for call sites.
+struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
+ AAMemoryBehaviorCallSite(const IRPosition &IRP, Attributor &A)
+ : AAMemoryBehaviorImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryBehaviorImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || !A.isFunctionIPOAmendable(*F)) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAMemoryBehavior>(*this, FnPos);
+ return clampStateAndIndicateChange(
+ getState(),
+ static_cast<const AAMemoryBehavior::StateType &>(FnAA.getState()));
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CS_ATTR(readnone)
+ else if (isAssumedReadOnly())
+ STATS_DECLTRACK_CS_ATTR(readonly)
+ else if (isAssumedWriteOnly())
+ STATS_DECLTRACK_CS_ATTR(writeonly)
+ }
+};
+
+ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = getAssumed();
+
+ auto CheckRWInst = [&](Instruction &I) {
+ // If the instruction has an own memory behavior state, use it to restrict
+ // the local state. No further analysis is required as the other memory
+ // state is as optimistic as it gets.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ *this, IRPosition::callsite_function(*CB));
+ intersectAssumedBits(MemBehaviorAA.getAssumed());
+ return !isAtFixpoint();
+ }
+
+ // Remove access kind modifiers if necessary.
+ if (I.mayReadFromMemory())
+ removeAssumedBits(NO_READS);
+ if (I.mayWriteToMemory())
+ removeAssumedBits(NO_WRITES);
+ return !isAtFixpoint();
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
+ return indicatePessimisticFixpoint();
+
+ return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
+}
+
+ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
+
+ const IRPosition &IRP = getIRPosition();
+ const IRPosition &FnPos = IRPosition::function_scope(IRP);
+ AAMemoryBehavior::StateType &S = getState();
+
+ // First, check the function scope. We take the known information and we avoid
+ // work if the assumed information implies the current assumed information for
+ // this attribute. This is a valid for all but byval arguments.
+ Argument *Arg = IRP.getAssociatedArgument();
+ AAMemoryBehavior::base_t FnMemAssumedState =
+ AAMemoryBehavior::StateType::getWorstState();
+ if (!Arg || !Arg->hasByValAttr()) {
+ const auto &FnMemAA = A.getAAFor<AAMemoryBehavior>(
+ *this, FnPos, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ FnMemAssumedState = FnMemAA.getAssumed();
+ S.addKnownBits(FnMemAA.getKnown());
+ if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Make sure the value is not captured (except through "return"), if
+ // it is, any information derived would be irrelevant anyway as we cannot
+ // check the potential aliases introduced by the capture. However, no need
+ // to fall back to anythign less optimistic than the function state.
+ const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ S.intersectAssumedBits(FnMemAssumedState);
+ return ChangeStatus::CHANGED;
+ }
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = S.getAssumed();
+
+ // Liveness information to exclude dead users.
+ // TODO: Take the FnPos once we have call site specific liveness information.
+ const auto &LivenessAA = A.getAAFor<AAIsDead>(
+ *this, IRPosition::function(*IRP.getAssociatedFunction()),
+ /* TrackDependence */ false);
+
+ // Visit and expand uses until all are analyzed or a fixpoint is reached.
+ for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) {
+ const Use *U = Uses[i];
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI
+ << " [Dead: " << (A.isAssumedDead(*U, this, &LivenessAA))
+ << "]\n");
+ if (A.isAssumedDead(*U, this, &LivenessAA))
+ continue;
+
+ // Droppable users, e.g., llvm::assume does not actually perform any action.
+ if (UserI->isDroppable())
+ continue;
+
+ // Check if the users of UserI should also be visited.
+ if (followUsersOfUseIn(A, U, UserI))
+ for (const Use &UserIUse : UserI->uses())
+ Uses.insert(&UserIUse);
+
+ // If UserI might touch memory we analyze the use in detail.
+ if (UserI->mayReadOrWriteMemory())
+ analyzeUseIn(A, U, UserI);
+ }
+
+ return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
+}
+
+bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI) {
+ // The loaded value is unrelated to the pointer argument, no need to
+ // follow the users of the load.
+ if (isa<LoadInst>(UserI))
+ return false;
+
+ // By default we follow all uses assuming UserI might leak information on U,
+ // we have special handling for call sites operands though.
+ const auto *CB = dyn_cast<CallBase>(UserI);
+ if (!CB || !CB->isArgOperand(U))
+ return true;
+
+ // If the use is a call argument known not to be captured, the users of
+ // the call do not need to be visited because they have to be unrelated to
+ // the input. Note that this check is not trivial even though we disallow
+ // general capturing of the underlying argument. The reason is that the
+ // call might the argument "through return", which we allow and for which we
+ // need to check call users.
+ if (U->get()->getType()->isPointerTy()) {
+ unsigned ArgNo = CB->getArgOperandNo(U);
+ const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
+ *this, IRPosition::callsite_argument(*CB, ArgNo),
+ /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ return !ArgNoCaptureAA.isAssumedNoCapture();
+ }
+
+ return true;
+}
+
+void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
+ const Instruction *UserI) {
+ assert(UserI->mayReadOrWriteMemory());
+
+ switch (UserI->getOpcode()) {
+ default:
+ // TODO: Handle all atomics and other side-effect operations we know of.
+ break;
+ case Instruction::Load:
+ // Loads cause the NO_READS property to disappear.
+ removeAssumedBits(NO_READS);
+ return;
+
+ case Instruction::Store:
+ // Stores cause the NO_WRITES property to disappear if the use is the
+ // pointer operand. Note that we do assume that capturing was taken care of
+ // somewhere else.
+ if (cast<StoreInst>(UserI)->getPointerOperand() == U->get())
+ removeAssumedBits(NO_WRITES);
+ return;
+
+ case Instruction::Call:
+ case Instruction::CallBr:
+ case Instruction::Invoke: {
+ // For call sites we look at the argument memory behavior attribute (this
+ // could be recursive!) in order to restrict our own state.
+ const auto *CB = cast<CallBase>(UserI);
+
+ // Give up on operand bundles.
+ if (CB->isBundleOperand(U)) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ // Calling a function does read the function pointer, maybe write it if the
+ // function is self-modifying.
+ if (CB->isCallee(U)) {
+ removeAssumedBits(NO_READS);
+ break;
+ }
+
+ // Adjust the possible access behavior based on the information on the
+ // argument.
+ IRPosition Pos;
+ if (U->get()->getType()->isPointerTy())
+ Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(U));
+ else
+ Pos = IRPosition::callsite_function(*CB);
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ *this, Pos,
+ /* TrackDependence */ true, DepClassTy::OPTIONAL);
+ // "assumed" has at most the same bits as the MemBehaviorAA assumed
+ // and at least "known".
+ intersectAssumedBits(MemBehaviorAA.getAssumed());
+ return;
+ }
+ };
+
+ // Generally, look at the "may-properties" and adjust the assumed state if we
+ // did not trigger special handling before.
+ if (UserI->mayReadFromMemory())
+ removeAssumedBits(NO_READS);
+ if (UserI->mayWriteToMemory())
+ removeAssumedBits(NO_WRITES);
+}
+
+} // namespace
+
+/// -------------------- Memory Locations Attributes ---------------------------
+/// Includes read-none, argmemonly, inaccessiblememonly,
+/// inaccessiblememorargmemonly
+/// ----------------------------------------------------------------------------
+
+std::string AAMemoryLocation::getMemoryLocationsAsStr(
+ AAMemoryLocation::MemoryLocationsKind MLK) {
+ if (0 == (MLK & AAMemoryLocation::NO_LOCATIONS))
+ return "all memory";
+ if (MLK == AAMemoryLocation::NO_LOCATIONS)
+ return "no memory";
+ std::string S = "memory:";
+ if (0 == (MLK & AAMemoryLocation::NO_LOCAL_MEM))
+ S += "stack,";
+ if (0 == (MLK & AAMemoryLocation::NO_CONST_MEM))
+ S += "constant,";
+ if (0 == (MLK & AAMemoryLocation::NO_GLOBAL_INTERNAL_MEM))
+ S += "internal global,";
+ if (0 == (MLK & AAMemoryLocation::NO_GLOBAL_EXTERNAL_MEM))
+ S += "external global,";
+ if (0 == (MLK & AAMemoryLocation::NO_ARGUMENT_MEM))
+ S += "argument,";
+ if (0 == (MLK & AAMemoryLocation::NO_INACCESSIBLE_MEM))
+ S += "inaccessible,";
+ if (0 == (MLK & AAMemoryLocation::NO_MALLOCED_MEM))
+ S += "malloced,";
+ if (0 == (MLK & AAMemoryLocation::NO_UNKOWN_MEM))
+ S += "unknown,";
+ S.pop_back();
+ return S;
+}
+
+namespace {
+struct AAMemoryLocationImpl : public AAMemoryLocation {
+
+ AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
+ : AAMemoryLocation(IRP, A), Allocator(A.Allocator) {
+ for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u)
+ AccessKind2Accesses[u] = nullptr;
+ }
+
+ ~AAMemoryLocationImpl() {
+ // The AccessSets are allocated via a BumpPtrAllocator, we call
+ // the destructor manually.
+ for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u)
+ if (AccessKind2Accesses[u])
+ AccessKind2Accesses[u]->~AccessSet();
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ intersectAssumedBits(BEST_STATE);
+ getKnownStateFromValue(A, getIRPosition(), getState());
+ IRAttribute::initialize(A);
+ }
+
+ /// Return the memory behavior information encoded in the IR for \p IRP.
+ static void getKnownStateFromValue(Attributor &A, const IRPosition &IRP,
+ BitIntegerState &State,
+ bool IgnoreSubsumingPositions = false) {
+ // For internal functions we ignore `argmemonly` and
+ // `inaccessiblememorargmemonly` as we might break it via interprocedural
+ // constant propagation. It is unclear if this is the best way but it is
+ // unlikely this will cause real performance problems. If we are deriving
+ // attributes for the anchor function we even remove the attribute in
+ // addition to ignoring it.
+ bool UseArgMemOnly = true;
+ Function *AnchorFn = IRP.getAnchorScope();
+ if (AnchorFn && A.isRunOn(*AnchorFn))
+ UseArgMemOnly = !AnchorFn->hasLocalLinkage();
+
+ SmallVector<Attribute, 2> Attrs;
+ IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
+ for (const Attribute &Attr : Attrs) {
+ switch (Attr.getKindAsEnum()) {
+ case Attribute::ReadNone:
+ State.addKnownBits(NO_LOCAL_MEM | NO_CONST_MEM);
+ break;
+ case Attribute::InaccessibleMemOnly:
+ State.addKnownBits(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
+ break;
+ case Attribute::ArgMemOnly:
+ if (UseArgMemOnly)
+ State.addKnownBits(inverseLocation(NO_ARGUMENT_MEM, true, true));
+ else
+ IRP.removeAttrs({Attribute::ArgMemOnly});
+ break;
+ case Attribute::InaccessibleMemOrArgMemOnly:
+ if (UseArgMemOnly)
+ State.addKnownBits(inverseLocation(
+ NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
+ else
+ IRP.removeAttrs({Attribute::InaccessibleMemOrArgMemOnly});
+ break;
+ default:
+ llvm_unreachable("Unexpected attribute!");
+ }
+ }
+ }
+
+ /// See AbstractAttribute::getDeducedAttributes(...).
+ void getDeducedAttributes(LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ assert(Attrs.size() == 0);
+ if (isAssumedReadNone()) {
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
+ } else if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) {
+ if (isAssumedInaccessibleMemOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::InaccessibleMemOnly));
+ else if (isAssumedArgMemOnly())
+ Attrs.push_back(Attribute::get(Ctx, Attribute::ArgMemOnly));
+ else if (isAssumedInaccessibleOrArgMemOnly())
+ Attrs.push_back(
+ Attribute::get(Ctx, Attribute::InaccessibleMemOrArgMemOnly));
+ }
+ assert(Attrs.size() <= 1);
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ const IRPosition &IRP = getIRPosition();
+
+ // Check if we would improve the existing attributes first.
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
+ if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
+ return IRP.hasAttr(Attr.getKindAsEnum(),
+ /* IgnoreSubsumingPositions */ true);
+ }))
+ return ChangeStatus::UNCHANGED;
+
+ // Clear existing attributes.
+ IRP.removeAttrs(AttrKinds);
+ if (isAssumedReadNone())
+ IRP.removeAttrs(AAMemoryBehaviorImpl::AttrKinds);
+
+ // Use the generic manifest method.
+ return IRAttribute::manifest(A);
+ }
+
+ /// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...).
+ bool checkForAllAccessesToMemoryKind(
+ function_ref<bool(const Instruction *, const Value *, AccessKind,
+ MemoryLocationsKind)>
+ Pred,
+ MemoryLocationsKind RequestedMLK) const override {
+ if (!isValidState())
+ return false;
+
+ MemoryLocationsKind AssumedMLK = getAssumedNotAccessedLocation();
+ if (AssumedMLK == NO_LOCATIONS)
+ return true;
+
+ unsigned Idx = 0;
+ for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS;
+ CurMLK *= 2, ++Idx) {
+ if (CurMLK & RequestedMLK)
+ continue;
+
+ if (const AccessSet *Accesses = AccessKind2Accesses[Idx])
+ for (const AccessInfo &AI : *Accesses)
+ if (!Pred(AI.I, AI.Ptr, AI.Kind, CurMLK))
+ return false;
+ }
+
+ return true;
+ }
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ // If we give up and indicate a pessimistic fixpoint this instruction will
+ // become an access for all potential access kinds:
+ // TODO: Add pointers for argmemonly and globals to improve the results of
+ // checkForAllAccessesToMemoryKind.
+ bool Changed = false;
+ MemoryLocationsKind KnownMLK = getKnown();
+ Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
+ for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2)
+ if (!(CurMLK & KnownMLK))
+ updateStateAndAccessesMap(getState(), CurMLK, I, nullptr, Changed,
+ getAccessKindFromInst(I));
+ return AAMemoryLocation::indicatePessimisticFixpoint();
+ }
+
+protected:
+ /// Helper struct to tie together an instruction that has a read or write
+ /// effect with the pointer it accesses (if any).
+ struct AccessInfo {
+
+ /// The instruction that caused the access.
+ const Instruction *I;
+
+ /// The base pointer that is accessed, or null if unknown.
+ const Value *Ptr;
+
+ /// The kind of access (read/write/read+write).
+ AccessKind Kind;
+
+ bool operator==(const AccessInfo &RHS) const {
+ return I == RHS.I && Ptr == RHS.Ptr && Kind == RHS.Kind;
+ }
+ bool operator()(const AccessInfo &LHS, const AccessInfo &RHS) const {
+ if (LHS.I != RHS.I)
+ return LHS.I < RHS.I;
+ if (LHS.Ptr != RHS.Ptr)
+ return LHS.Ptr < RHS.Ptr;
+ if (LHS.Kind != RHS.Kind)
+ return LHS.Kind < RHS.Kind;
+ return false;
+ }
+ };
+
+ /// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the
+ /// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind.
+ using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>;
+ AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()];
+
+ /// Return the kind(s) of location that may be accessed by \p V.
+ AAMemoryLocation::MemoryLocationsKind
+ categorizeAccessedLocations(Attributor &A, Instruction &I, bool &Changed);
+
+ /// Return the access kind as determined by \p I.
+ AccessKind getAccessKindFromInst(const Instruction *I) {
+ AccessKind AK = READ_WRITE;
+ if (I) {
+ AK = I->mayReadFromMemory() ? READ : NONE;
+ AK = AccessKind(AK | (I->mayWriteToMemory() ? WRITE : NONE));
+ }
+ return AK;
+ }
+
+ /// Update the state \p State and the AccessKind2Accesses given that \p I is
+ /// an access of kind \p AK to a \p MLK memory location with the access
+ /// pointer \p Ptr.
+ void updateStateAndAccessesMap(AAMemoryLocation::StateType &State,
+ MemoryLocationsKind MLK, const Instruction *I,
+ const Value *Ptr, bool &Changed,
+ AccessKind AK = READ_WRITE) {
+
+ assert(isPowerOf2_32(MLK) && "Expected a single location set!");
+ auto *&Accesses = AccessKind2Accesses[llvm::Log2_32(MLK)];
+ if (!Accesses)
+ Accesses = new (Allocator) AccessSet();
+ Changed |= Accesses->insert(AccessInfo{I, Ptr, AK}).second;
+ State.removeAssumedBits(MLK);
+ }
+
+ /// Determine the underlying locations kinds for \p Ptr, e.g., globals or
+ /// arguments, and update the state and access map accordingly.
+ void categorizePtrValue(Attributor &A, const Instruction &I, const Value &Ptr,
+ AAMemoryLocation::StateType &State, bool &Changed);
+
+ /// Used to allocate access sets.
+ BumpPtrAllocator &Allocator;
+
+ /// The set of IR attributes AAMemoryLocation deals with.
+ static const Attribute::AttrKind AttrKinds[4];
+};
+
+const Attribute::AttrKind AAMemoryLocationImpl::AttrKinds[] = {
+ Attribute::ReadNone, Attribute::InaccessibleMemOnly, Attribute::ArgMemOnly,
+ Attribute::InaccessibleMemOrArgMemOnly};
+
+void AAMemoryLocationImpl::categorizePtrValue(
+ Attributor &A, const Instruction &I, const Value &Ptr,
+ AAMemoryLocation::StateType &State, bool &Changed) {
+ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize pointer locations for "
+ << Ptr << " ["
+ << getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
+
+ auto StripGEPCB = [](Value *V) -> Value * {
+ auto *GEP = dyn_cast<GEPOperator>(V);
+ while (GEP) {
+ V = GEP->getPointerOperand();
+ GEP = dyn_cast<GEPOperator>(V);
+ }
+ return V;
+ };
+
+ auto VisitValueCB = [&](Value &V, const Instruction *,
+ AAMemoryLocation::StateType &T,
+ bool Stripped) -> bool {
+ MemoryLocationsKind MLK = NO_LOCATIONS;
+ assert(!isa<GEPOperator>(V) && "GEPs should have been stripped.");
+ if (isa<UndefValue>(V))
+ return true;
+ if (auto *Arg = dyn_cast<Argument>(&V)) {
+ if (Arg->hasByValAttr())
+ MLK = NO_LOCAL_MEM;
+ else
+ MLK = NO_ARGUMENT_MEM;
+ } else if (auto *GV = dyn_cast<GlobalValue>(&V)) {
+ if (GV->hasLocalLinkage())
+ MLK = NO_GLOBAL_INTERNAL_MEM;
+ else
+ MLK = NO_GLOBAL_EXTERNAL_MEM;
+ } else if (isa<ConstantPointerNull>(V) &&
+ !NullPointerIsDefined(getAssociatedFunction(),
+ V.getType()->getPointerAddressSpace())) {
+ return true;
+ } else if (isa<AllocaInst>(V)) {
+ MLK = NO_LOCAL_MEM;
+ } else if (const auto *CB = dyn_cast<CallBase>(&V)) {
+ const auto &NoAliasAA =
+ A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(*CB));
+ if (NoAliasAA.isAssumedNoAlias())
+ MLK = NO_MALLOCED_MEM;
+ else
+ MLK = NO_UNKOWN_MEM;
+ } else {
+ MLK = NO_UNKOWN_MEM;
+ }
+
+ assert(MLK != NO_LOCATIONS && "No location specified!");
+ updateStateAndAccessesMap(T, MLK, &I, &V, Changed,
+ getAccessKindFromInst(&I));
+ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value cannot be categorized: "
+ << V << " -> " << getMemoryLocationsAsStr(T.getAssumed())
+ << "\n");
+ return true;
+ };
+
+ if (!genericValueTraversal<AAMemoryLocation, AAMemoryLocation::StateType>(
+ A, IRPosition::value(Ptr), *this, State, VisitValueCB, getCtxI(),
+ /* UseValueSimplify */ true,
+ /* MaxValues */ 32, StripGEPCB)) {
+ LLVM_DEBUG(
+ dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
+ updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
+ getAccessKindFromInst(&I));
+ } else {
+ LLVM_DEBUG(
+ dbgs()
+ << "[AAMemoryLocation] Accessed locations with pointer locations: "
+ << getMemoryLocationsAsStr(State.getAssumed()) << "\n");
+ }
+}
+
+AAMemoryLocation::MemoryLocationsKind
+AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
+ bool &Changed) {
+ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize accessed locations for "
+ << I << "\n");
+
+ AAMemoryLocation::StateType AccessedLocs;
+ AccessedLocs.intersectAssumedBits(NO_LOCATIONS);
+
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+
+ // First check if we assume any memory is access is visible.
+ const auto &CBMemLocationAA =
+ A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(*CB));
+ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I
+ << " [" << CBMemLocationAA << "]\n");
+
+ if (CBMemLocationAA.isAssumedReadNone())
+ return NO_LOCATIONS;
+
+ if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) {
+ updateStateAndAccessesMap(AccessedLocs, NO_INACCESSIBLE_MEM, &I, nullptr,
+ Changed, getAccessKindFromInst(&I));
+ return AccessedLocs.getAssumed();
+ }
+
+ uint32_t CBAssumedNotAccessedLocs =
+ CBMemLocationAA.getAssumedNotAccessedLocation();
+
+ // Set the argmemonly and global bit as we handle them separately below.
+ uint32_t CBAssumedNotAccessedLocsNoArgMem =
+ CBAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
+
+ for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) {
+ if (CBAssumedNotAccessedLocsNoArgMem & CurMLK)
+ continue;
+ updateStateAndAccessesMap(AccessedLocs, CurMLK, &I, nullptr, Changed,
+ getAccessKindFromInst(&I));
+ }
+
+ // Now handle global memory if it might be accessed. This is slightly tricky
+ // as NO_GLOBAL_MEM has multiple bits set.
+ bool HasGlobalAccesses = ((~CBAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
+ if (HasGlobalAccesses) {
+ auto AccessPred = [&](const Instruction *, const Value *Ptr,
+ AccessKind Kind, MemoryLocationsKind MLK) {
+ updateStateAndAccessesMap(AccessedLocs, MLK, &I, Ptr, Changed,
+ getAccessKindFromInst(&I));
+ return true;
+ };
+ if (!CBMemLocationAA.checkForAllAccessesToMemoryKind(
+ AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false)))
+ return AccessedLocs.getWorstState();
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "[AAMemoryLocation] Accessed state before argument handling: "
+ << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
+
+ // Now handle argument memory if it might be accessed.
+ bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
+ if (HasArgAccesses) {
+ for (unsigned ArgNo = 0, E = CB->getNumArgOperands(); ArgNo < E;
+ ++ArgNo) {
+
+ // Skip non-pointer arguments.
+ const Value *ArgOp = CB->getArgOperand(ArgNo);
+ if (!ArgOp->getType()->isPtrOrPtrVectorTy())
+ continue;
+
+ // Skip readnone arguments.
+ const IRPosition &ArgOpIRP = IRPosition::callsite_argument(*CB, ArgNo);
+ const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
+ *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
+
+ if (ArgOpMemLocationAA.isAssumedReadNone())
+ continue;
+
+ // Categorize potentially accessed pointer arguments as if there was an
+ // access instruction with them as pointer.
+ categorizePtrValue(A, I, *ArgOp, AccessedLocs, Changed);
+ }
+ }
+
+ LLVM_DEBUG(
+ dbgs() << "[AAMemoryLocation] Accessed state after argument handling: "
+ << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
+
+ return AccessedLocs.getAssumed();
+ }
+
+ if (const Value *Ptr = getPointerOperand(&I, /* AllowVolatile */ true)) {
+ LLVM_DEBUG(
+ dbgs() << "[AAMemoryLocation] Categorize memory access with pointer: "
+ << I << " [" << *Ptr << "]\n");
+ categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed);
+ return AccessedLocs.getAssumed();
+ }
+
+ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Failed to categorize instruction: "
+ << I << "\n");
+ updateStateAndAccessesMap(AccessedLocs, NO_UNKOWN_MEM, &I, nullptr, Changed,
+ getAccessKindFromInst(&I));
+ return AccessedLocs.getAssumed();
+}
+
+/// An AA to represent the memory behavior function attributes.
+struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
+ AAMemoryLocationFunction(const IRPosition &IRP, Attributor &A)
+ : AAMemoryLocationImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ virtual ChangeStatus updateImpl(Attributor &A) override {
+
+ const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ *this, getIRPosition(), /* TrackDependence */ false);
+ if (MemBehaviorAA.isAssumedReadNone()) {
+ if (MemBehaviorAA.isKnownReadNone())
+ return indicateOptimisticFixpoint();
+ assert(isAssumedReadNone() &&
+ "AAMemoryLocation was not read-none but AAMemoryBehavior was!");
+ A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // The current assumed state used to determine a change.
+ auto AssumedState = getAssumed();
+ bool Changed = false;
+
+ auto CheckRWInst = [&](Instruction &I) {
+ MemoryLocationsKind MLK = categorizeAccessedLocations(A, I, Changed);
+ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Accessed locations for " << I
+ << ": " << getMemoryLocationsAsStr(MLK) << "\n");
+ removeAssumedBits(inverseLocation(MLK, false, false));
+ return true;
+ };
+
+ if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
+ return indicatePessimisticFixpoint();
+
+ Changed |= AssumedState != getAssumed();
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_FN_ATTR(readnone)
+ else if (isAssumedArgMemOnly())
+ STATS_DECLTRACK_FN_ATTR(argmemonly)
+ else if (isAssumedInaccessibleMemOnly())
+ STATS_DECLTRACK_FN_ATTR(inaccessiblememonly)
+ else if (isAssumedInaccessibleOrArgMemOnly())
+ STATS_DECLTRACK_FN_ATTR(inaccessiblememorargmemonly)
+ }
+};
+
+/// AAMemoryLocation attribute for call sites.
+struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
+ AAMemoryLocationCallSite(const IRPosition &IRP, Attributor &A)
+ : AAMemoryLocationImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAMemoryLocationImpl::initialize(A);
+ Function *F = getAssociatedFunction();
+ if (!F || !A.isFunctionIPOAmendable(*F)) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Function *F = getAssociatedFunction();
+ const IRPosition &FnPos = IRPosition::function(*F);
+ auto &FnAA = A.getAAFor<AAMemoryLocation>(*this, FnPos);
+ bool Changed = false;
+ auto AccessPred = [&](const Instruction *I, const Value *Ptr,
+ AccessKind Kind, MemoryLocationsKind MLK) {
+ updateStateAndAccessesMap(getState(), MLK, I, Ptr, Changed,
+ getAccessKindFromInst(I));
+ return true;
+ };
+ if (!FnAA.checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS))
+ return indicatePessimisticFixpoint();
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ if (isAssumedReadNone())
+ STATS_DECLTRACK_CS_ATTR(readnone)
+ }
+};
+
+/// ------------------ Value Constant Range Attribute -------------------------
+
+struct AAValueConstantRangeImpl : AAValueConstantRange {
+ using StateType = IntegerRangeState;
+ AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A)
+ : AAValueConstantRange(IRP, A) {}
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ std::string Str;
+ llvm::raw_string_ostream OS(Str);
+ OS << "range(" << getBitWidth() << ")<";
+ getKnown().print(OS);
+ OS << " / ";
+ getAssumed().print(OS);
+ OS << ">";
+ return OS.str();
+ }
+
+ /// Helper function to get a SCEV expr for the associated value at program
+ /// point \p I.
+ const SCEV *getSCEV(Attributor &A, const Instruction *I = nullptr) const {
+ if (!getAnchorScope())
+ return nullptr;
+
+ ScalarEvolution *SE =
+ A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
+ *getAnchorScope());
+
+ LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(
+ *getAnchorScope());
+
+ if (!SE || !LI)
+ return nullptr;
+
+ const SCEV *S = SE->getSCEV(&getAssociatedValue());
+ if (!I)
+ return S;
+
+ return SE->getSCEVAtScope(S, LI->getLoopFor(I->getParent()));
+ }
+
+ /// Helper function to get a range from SCEV for the associated value at
+ /// program point \p I.
+ ConstantRange getConstantRangeFromSCEV(Attributor &A,
+ const Instruction *I = nullptr) const {
+ if (!getAnchorScope())
+ return getWorstState(getBitWidth());
+
+ ScalarEvolution *SE =
+ A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
+ *getAnchorScope());
+
+ const SCEV *S = getSCEV(A, I);
+ if (!SE || !S)
+ return getWorstState(getBitWidth());
+
+ return SE->getUnsignedRange(S);
+ }
+
+ /// Helper function to get a range from LVI for the associated value at
+ /// program point \p I.
+ ConstantRange
+ getConstantRangeFromLVI(Attributor &A,
+ const Instruction *CtxI = nullptr) const {
+ if (!getAnchorScope())
+ return getWorstState(getBitWidth());
+
+ LazyValueInfo *LVI =
+ A.getInfoCache().getAnalysisResultForFunction<LazyValueAnalysis>(
+ *getAnchorScope());
+
+ if (!LVI || !CtxI)
+ return getWorstState(getBitWidth());
+ return LVI->getConstantRange(&getAssociatedValue(),
+ const_cast<BasicBlock *>(CtxI->getParent()),
+ const_cast<Instruction *>(CtxI));
+ }
+
+ /// See AAValueConstantRange::getKnownConstantRange(..).
+ ConstantRange
+ getKnownConstantRange(Attributor &A,
+ const Instruction *CtxI = nullptr) const override {
+ if (!CtxI || CtxI == getCtxI())
+ return getKnown();
+
+ ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
+ ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
+ return getKnown().intersectWith(SCEVR).intersectWith(LVIR);
+ }
+
+ /// See AAValueConstantRange::getAssumedConstantRange(..).
+ ConstantRange
+ getAssumedConstantRange(Attributor &A,
+ const Instruction *CtxI = nullptr) const override {
+ // TODO: Make SCEV use Attributor assumption.
+ // We may be able to bound a variable range via assumptions in
+ // Attributor. ex.) If x is assumed to be in [1, 3] and y is known to
+ // evolve to x^2 + x, then we can say that y is in [2, 12].
+
+ if (!CtxI || CtxI == getCtxI())
+ return getAssumed();
+
+ ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
+ ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
+ return getAssumed().intersectWith(SCEVR).intersectWith(LVIR);
+ }
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ // Intersect a range given by SCEV.
+ intersectKnown(getConstantRangeFromSCEV(A, getCtxI()));
+
+ // Intersect a range given by LVI.
+ intersectKnown(getConstantRangeFromLVI(A, getCtxI()));
+ }
+
+ /// Helper function to create MDNode for range metadata.
+ static MDNode *
+ getMDNodeForConstantRange(Type *Ty, LLVMContext &Ctx,
+ const ConstantRange &AssumedConstantRange) {
+ Metadata *LowAndHigh[] = {ConstantAsMetadata::get(ConstantInt::get(
+ Ty, AssumedConstantRange.getLower())),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Ty, AssumedConstantRange.getUpper()))};
+ return MDNode::get(Ctx, LowAndHigh);
+ }
+
+ /// Return true if \p Assumed is included in \p KnownRanges.
+ static bool isBetterRange(const ConstantRange &Assumed, MDNode *KnownRanges) {
+
+ if (Assumed.isFullSet())
+ return false;
+
+ if (!KnownRanges)
+ return true;
+
+ // If multiple ranges are annotated in IR, we give up to annotate assumed
+ // range for now.
+
+ // TODO: If there exists a known range which containts assumed range, we
+ // can say assumed range is better.
+ if (KnownRanges->getNumOperands() > 2)
+ return false;
+
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(KnownRanges->getOperand(0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(KnownRanges->getOperand(1));
+
+ ConstantRange Known(Lower->getValue(), Upper->getValue());
+ return Known.contains(Assumed) && Known != Assumed;
+ }
+
+ /// Helper function to set range metadata.
+ static bool
+ setRangeMetadataIfisBetterRange(Instruction *I,
+ const ConstantRange &AssumedConstantRange) {
+ auto *OldRangeMD = I->getMetadata(LLVMContext::MD_range);
+ if (isBetterRange(AssumedConstantRange, OldRangeMD)) {
+ if (!AssumedConstantRange.isEmptySet()) {
+ I->setMetadata(LLVMContext::MD_range,
+ getMDNodeForConstantRange(I->getType(), I->getContext(),
+ AssumedConstantRange));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// See AbstractAttribute::manifest()
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ ConstantRange AssumedConstantRange = getAssumedConstantRange(A);
+ assert(!AssumedConstantRange.isFullSet() && "Invalid state");
+
+ auto &V = getAssociatedValue();
+ if (!AssumedConstantRange.isEmptySet() &&
+ !AssumedConstantRange.isSingleElement()) {
+ if (Instruction *I = dyn_cast<Instruction>(&V))
+ if (isa<CallInst>(I) || isa<LoadInst>(I))
+ if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange))
+ Changed = ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+ }
+};
+
+struct AAValueConstantRangeArgument final
+ : AAArgumentFromCallSiteArguments<
+ AAValueConstantRange, AAValueConstantRangeImpl, IntegerRangeState> {
+ using Base = AAArgumentFromCallSiteArguments<
+ AAValueConstantRange, AAValueConstantRangeImpl, IntegerRangeState>;
+ AAValueConstantRangeArgument(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
+ indicatePessimisticFixpoint();
+ } else {
+ Base::initialize(A);
+ }
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(value_range)
+ }
+};
+
+struct AAValueConstantRangeReturned
+ : AAReturnedFromReturnedValues<AAValueConstantRange,
+ AAValueConstantRangeImpl> {
+ using Base = AAReturnedFromReturnedValues<AAValueConstantRange,
+ AAValueConstantRangeImpl>;
+ AAValueConstantRangeReturned(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(value_range)
+ }
+};
+
+struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
+ AAValueConstantRangeFloating(const IRPosition &IRP, Attributor &A)
+ : AAValueConstantRangeImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ AAValueConstantRangeImpl::initialize(A);
+ Value &V = getAssociatedValue();
+
+ if (auto *C = dyn_cast<ConstantInt>(&V)) {
+ unionAssumed(ConstantRange(C->getValue()));
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<UndefValue>(&V)) {
+ // Collapse the undef state to 0.
+ unionAssumed(ConstantRange(APInt(getBitWidth(), 0)));
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ if (isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<CastInst>(&V))
+ return;
+ // If it is a load instruction with range metadata, use it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(&V))
+ if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) {
+ intersectKnown(getConstantRangeFromMetadata(*RangeMD));
+ return;
+ }
+
+ // We can work with PHI and select instruction as we traverse their operands
+ // during update.
+ if (isa<SelectInst>(V) || isa<PHINode>(V))
+ return;
+
+ // Otherwise we give up.
+ indicatePessimisticFixpoint();
+
+ LLVM_DEBUG(dbgs() << "[AAValueConstantRange] We give up: "
+ << getAssociatedValue() << "\n");
+ }
+
+ bool calculateBinaryOperator(
+ Attributor &A, BinaryOperator *BinOp, IntegerRangeState &T,
+ const Instruction *CtxI,
+ SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) {
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
+ // TODO: Allow non integers as well.
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return false;
+
+ auto &LHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS));
+ QuerriedAAs.push_back(&LHSAA);
+ auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
+
+ auto &RHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS));
+ QuerriedAAs.push_back(&RHSAA);
+ auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
+
+ auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange);
+
+ T.unionAssumed(AssumedRange);
+
+ // TODO: Track a known state too.
+
+ return T.isValidState();
+ }
+
+ bool calculateCastInst(
+ Attributor &A, CastInst *CastI, IntegerRangeState &T,
+ const Instruction *CtxI,
+ SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) {
+ assert(CastI->getNumOperands() == 1 && "Expected cast to be unary!");
+ // TODO: Allow non integers as well.
+ Value &OpV = *CastI->getOperand(0);
+ if (!OpV.getType()->isIntegerTy())
+ return false;
+
+ auto &OpAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(OpV));
+ QuerriedAAs.push_back(&OpAA);
+ T.unionAssumed(
+ OpAA.getAssumed().castOp(CastI->getOpcode(), getState().getBitWidth()));
+ return T.isValidState();
+ }
+
+ bool
+ calculateCmpInst(Attributor &A, CmpInst *CmpI, IntegerRangeState &T,
+ const Instruction *CtxI,
+ SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) {
+ Value *LHS = CmpI->getOperand(0);
+ Value *RHS = CmpI->getOperand(1);
+ // TODO: Allow non integers as well.
+ if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+ return false;
+
+ auto &LHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*LHS));
+ QuerriedAAs.push_back(&LHSAA);
+ auto &RHSAA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(*RHS));
+ QuerriedAAs.push_back(&RHSAA);
+
+ auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
+ auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
+
+ // If one of them is empty set, we can't decide.
+ if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet())
+ return true;
+
+ bool MustTrue = false, MustFalse = false;
+
+ auto AllowedRegion =
+ ConstantRange::makeAllowedICmpRegion(CmpI->getPredicate(), RHSAARange);
+
+ auto SatisfyingRegion = ConstantRange::makeSatisfyingICmpRegion(
+ CmpI->getPredicate(), RHSAARange);
+
+ if (AllowedRegion.intersectWith(LHSAARange).isEmptySet())
+ MustFalse = true;
+
+ if (SatisfyingRegion.contains(LHSAARange))
+ MustTrue = true;
+
+ assert((!MustTrue || !MustFalse) &&
+ "Either MustTrue or MustFalse should be false!");
+
+ if (MustTrue)
+ T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 1)));
+ else if (MustFalse)
+ T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 0)));
+ else
+ T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true));
+
+ LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA
+ << " " << RHSAA << "\n");
+
+ // TODO: Track a known state too.
+ return T.isValidState();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
+ IntegerRangeState &T, bool Stripped) -> bool {
+ Instruction *I = dyn_cast<Instruction>(&V);
+ if (!I || isa<CallBase>(I)) {
+
+ // If the value is not instruction, we query AA to Attributor.
+ const auto &AA =
+ A.getAAFor<AAValueConstantRange>(*this, IRPosition::value(V));
+
+ // Clamp operator is not used to utilize a program point CtxI.
+ T.unionAssumed(AA.getAssumedConstantRange(A, CtxI));
+
+ return T.isValidState();
+ }
+
+ SmallVector<const AAValueConstantRange *, 4> QuerriedAAs;
+ if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
+ if (!calculateBinaryOperator(A, BinOp, T, CtxI, QuerriedAAs))
+ return false;
+ } else if (auto *CmpI = dyn_cast<CmpInst>(I)) {
+ if (!calculateCmpInst(A, CmpI, T, CtxI, QuerriedAAs))
+ return false;
+ } else if (auto *CastI = dyn_cast<CastInst>(I)) {
+ if (!calculateCastInst(A, CastI, T, CtxI, QuerriedAAs))
+ return false;
+ } else {
+ // Give up with other instructions.
+ // TODO: Add other instructions
+
+ T.indicatePessimisticFixpoint();
+ return false;
+ }
+
+ // Catch circular reasoning in a pessimistic way for now.
+ // TODO: Check how the range evolves and if we stripped anything, see also
+ // AADereferenceable or AAAlign for similar situations.
+ for (const AAValueConstantRange *QueriedAA : QuerriedAAs) {
+ if (QueriedAA != this)
+ continue;
+ // If we are in a stady state we do not need to worry.
+ if (T.getAssumed() == getState().getAssumed())
+ continue;
+ T.indicatePessimisticFixpoint();
+ }
+
+ return T.isValidState();
+ };
+
+ IntegerRangeState T(getBitWidth());
+
+ if (!genericValueTraversal<AAValueConstantRange, IntegerRangeState>(
+ A, getIRPosition(), *this, T, VisitValueCB, getCtxI(),
+ /* UseValueSimplify */ false))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(value_range)
+ }
+};
+
+struct AAValueConstantRangeFunction : AAValueConstantRangeImpl {
+ AAValueConstantRangeFunction(const IRPosition &IRP, Attributor &A)
+ : AAValueConstantRangeImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("AAValueConstantRange(Function|CallSite)::updateImpl will "
+ "not be called");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(value_range) }
+};
+
+struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction {
+ AAValueConstantRangeCallSite(const IRPosition &IRP, Attributor &A)
+ : AAValueConstantRangeFunction(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(value_range) }
+};
+
+struct AAValueConstantRangeCallSiteReturned
+ : AACallSiteReturnedFromReturned<AAValueConstantRange,
+ AAValueConstantRangeImpl> {
+ AAValueConstantRangeCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AAValueConstantRange,
+ AAValueConstantRangeImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // If it is a load instruction with range metadata, use the metadata.
+ if (CallInst *CI = dyn_cast<CallInst>(&getAssociatedValue()))
+ if (auto *RangeMD = CI->getMetadata(LLVMContext::MD_range))
+ intersectKnown(getConstantRangeFromMetadata(*RangeMD));
+
+ AAValueConstantRangeImpl::initialize(A);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(value_range)
+ }
+};
+struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
+ AAValueConstantRangeCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAValueConstantRangeFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(value_range)
+ }
+};
+} // namespace
+
+const char AAReturnedValues::ID = 0;
+const char AANoUnwind::ID = 0;
+const char AANoSync::ID = 0;
+const char AANoFree::ID = 0;
+const char AANonNull::ID = 0;
+const char AANoRecurse::ID = 0;
+const char AAWillReturn::ID = 0;
+const char AAUndefinedBehavior::ID = 0;
+const char AANoAlias::ID = 0;
+const char AAReachability::ID = 0;
+const char AANoReturn::ID = 0;
+const char AAIsDead::ID = 0;
+const char AADereferenceable::ID = 0;
+const char AAAlign::ID = 0;
+const char AANoCapture::ID = 0;
+const char AAValueSimplify::ID = 0;
+const char AAHeapToStack::ID = 0;
+const char AAPrivatizablePtr::ID = 0;
+const char AAMemoryBehavior::ID = 0;
+const char AAMemoryLocation::ID = 0;
+const char AAValueConstantRange::ID = 0;
+
+// Macro magic to create the static generator function for attributes that
+// follow the naming scheme.
+
+#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \
+ case IRPosition::PK: \
+ llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!");
+
+#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \
+ case IRPosition::PK: \
+ AA = new (A.Allocator) CLASS##SUFFIX(IRP, A); \
+ ++NumAAs; \
+ break;
+
+#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
+ SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
+ SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
+ SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ } \
+ return *AA; \
+ }
+
+#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
+ CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
+ CLASS *AA = nullptr; \
+ switch (IRP.getPositionKind()) { \
+ SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
+ SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
+ SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
+ } \
+ return *AA; \
+ }
+
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
+
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
+
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
+CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
+
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
+
+CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
+
+#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION
+#undef SWITCH_PK_CREATE
+#undef SWITCH_PK_INV
diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
index aec470ffadc43..1d1300c6cd1d0 100644
--- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
@@ -127,7 +127,8 @@ void BlockExtractor::loadFile() {
/*KeepEmpty=*/false);
if (BBNames.empty())
report_fatal_error("Missing bbs name");
- BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}});
+ BlocksByName.push_back(
+ {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}});
}
}
diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index f28a399b1779b..74f11fa309592 100644
--- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -19,7 +19,6 @@
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
#include "llvm/Analysis/SparsePropagation.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -72,8 +71,7 @@ public:
CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {}
CVPLatticeVal(std::vector<Function *> &&Functions)
: LatticeState(FunctionSet), Functions(std::move(Functions)) {
- assert(std::is_sorted(this->Functions.begin(), this->Functions.end(),
- Compare()));
+ assert(llvm::is_sorted(this->Functions, Compare()));
}
/// Get a reference to the functions held by this lattice value. The number
@@ -173,9 +171,8 @@ public:
SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) override {
switch (I.getOpcode()) {
case Instruction::Call:
- return visitCallSite(cast<CallInst>(&I), ChangedValues, SS);
case Instruction::Invoke:
- return visitCallSite(cast<InvokeInst>(&I), ChangedValues, SS);
+ return visitCallBase(cast<CallBase>(I), ChangedValues, SS);
case Instruction::Load:
return visitLoad(*cast<LoadInst>(&I), ChangedValues, SS);
case Instruction::Ret:
@@ -217,13 +214,13 @@ public:
/// We collect a set of indirect calls when visiting call sites. This method
/// returns a reference to that set.
- SmallPtrSetImpl<Instruction *> &getIndirectCalls() { return IndirectCalls; }
+ SmallPtrSetImpl<CallBase *> &getIndirectCalls() { return IndirectCalls; }
private:
/// Holds the indirect calls we encounter during the analysis. We will attach
/// metadata to these calls after the analysis indicating the functions the
/// calls can possibly target.
- SmallPtrSet<Instruction *, 32> IndirectCalls;
+ SmallPtrSet<CallBase *, 32> IndirectCalls;
/// Compute a new lattice value for the given constant. The constant, after
/// stripping any pointer casts, should be a Function. We ignore null
@@ -255,23 +252,22 @@ private:
/// the merge of the argument state with the call sites corresponding actual
/// argument state. The call site state is the merge of the call site state
/// with the returned value state of the called function.
- void visitCallSite(CallSite CS,
+ void visitCallBase(CallBase &CB,
DenseMap<CVPLatticeKey, CVPLatticeVal> &ChangedValues,
SparseSolver<CVPLatticeKey, CVPLatticeVal> &SS) {
- Function *F = CS.getCalledFunction();
- Instruction *I = CS.getInstruction();
- auto RegI = CVPLatticeKey(I, IPOGrouping::Register);
+ Function *F = CB.getCalledFunction();
+ auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register);
// If this is an indirect call, save it so we can quickly revisit it when
// attaching metadata.
if (!F)
- IndirectCalls.insert(I);
+ IndirectCalls.insert(&CB);
// If we can't track the function's return values, there's nothing to do.
if (!F || !canTrackReturnsInterprocedurally(F)) {
// Void return, No need to create and update CVPLattice state as no one
// can use it.
- if (I->getType()->isVoidTy())
+ if (CB.getType()->isVoidTy())
return;
ChangedValues[RegI] = getOverdefinedVal();
return;
@@ -284,14 +280,14 @@ private:
for (Argument &A : F->args()) {
auto RegFormal = CVPLatticeKey(&A, IPOGrouping::Register);
auto RegActual =
- CVPLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register);
+ CVPLatticeKey(CB.getArgOperand(A.getArgNo()), IPOGrouping::Register);
ChangedValues[RegFormal] =
MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual));
}
// Void return, No need to create and update CVPLattice state as no one can
// use it.
- if (I->getType()->isVoidTy())
+ if (CB.getType()->isVoidTy())
return;
ChangedValues[RegI] =
@@ -388,9 +384,8 @@ static bool runCVP(Module &M) {
// the set of functions they can possibly target.
bool Changed = false;
MDBuilder MDB(M.getContext());
- for (Instruction *C : Lattice.getIndirectCalls()) {
- CallSite CS(C);
- auto RegI = CVPLatticeKey(CS.getCalledValue(), IPOGrouping::Register);
+ for (CallBase *C : Lattice.getIndirectCalls()) {
+ auto RegI = CVPLatticeKey(C->getCalledOperand(), IPOGrouping::Register);
CVPLatticeVal LV = Solver.getExistingValueState(RegI);
if (!LV.isFunctionSet() || LV.getFunctions().empty())
continue;
diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index ea1278aa108fb..67f1438b9b6ac 100644
--- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -84,11 +84,9 @@ static void copyDebugLocMetadata(const GlobalVariable *From,
To->addDebugInfo(MD);
}
-static unsigned getAlignment(GlobalVariable *GV) {
- unsigned Align = GV->getAlignment();
- if (Align)
- return Align;
- return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
+static Align getAlign(GlobalVariable *GV) {
+ return GV->getAlign().getValueOr(
+ GV->getParent()->getDataLayout().getPreferredAlign(GV));
}
static bool
@@ -120,8 +118,8 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
<< New->getName() << "\n");
// Bump the alignment if necessary.
- if (Old->getAlignment() || New->getAlignment())
- New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New))));
+ if (Old->getAlign() || New->getAlign())
+ New->setAlignment(std::max(getAlign(Old), getAlign(New)));
copyDebugLocMetadata(Old, New);
Old->replaceAllUsesWith(NewConstant);
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 61d519d8ae880..54c51b6e7161b 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -22,16 +22,17 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -175,16 +176,15 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
//
std::vector<Value *> Args;
for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
- CallSite CS(*I++);
- if (!CS)
+ CallBase *CB = dyn_cast<CallBase>(*I++);
+ if (!CB)
continue;
- Instruction *Call = CS.getInstruction();
// Pass all the same arguments.
- Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+ Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs);
// Drop any attributes that were on the vararg arguments.
- AttributeList PAL = CS.getAttributes();
+ AttributeList PAL = CB->getAttributes();
if (!PAL.isEmpty()) {
SmallVector<AttributeSet, 8> ArgAttrs;
for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
@@ -194,34 +194,31 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
}
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ CB->getOperandBundlesAsDefs(OpBundles);
- CallSite NewCS;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
+ CallBase *NewCB = nullptr;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CB)) {
+ NewCB = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", CB);
} else {
- NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(NewCS.getInstruction())
- ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+ NewCB = CallInst::Create(NF, Args, OpBundles, "", CB);
+ cast<CallInst>(NewCB)->setTailCallKind(
+ cast<CallInst>(CB)->getTailCallKind());
}
- NewCS.setCallingConv(CS.getCallingConv());
- NewCS.setAttributes(PAL);
- NewCS->setDebugLoc(Call->getDebugLoc());
- uint64_t W;
- if (Call->extractProfTotalWeight(W))
- NewCS->setProfWeight(W);
+ NewCB->setCallingConv(CB->getCallingConv());
+ NewCB->setAttributes(PAL);
+ NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
- if (!Call->use_empty())
- Call->replaceAllUsesWith(NewCS.getInstruction());
+ if (!CB->use_empty())
+ CB->replaceAllUsesWith(NewCB);
- NewCS->takeName(Call);
+ NewCB->takeName(CB);
// Finally, remove the old call from the program, reducing the use-count of
// F.
- Call->eraseFromParent();
+ CB->eraseFromParent();
}
// Since we have now created the new function, splice the body of the old
@@ -291,7 +288,8 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
bool Changed = false;
for (Argument &Arg : Fn.args()) {
- if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) {
+ if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() &&
+ !Arg.hasPassPointeeByValueAttr()) {
if (Arg.isUsedByMetadata()) {
Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
Changed = true;
@@ -304,16 +302,16 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
return false;
for (Use &U : Fn.uses()) {
- CallSite CS(U.getUser());
- if (!CS || !CS.isCallee(&U))
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
+ if (!CB || !CB->isCallee(&U))
continue;
// Now go through all unused args and replace them with "undef".
for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
unsigned ArgNo = UnusedArgs[I];
- Value *Arg = CS.getArgument(ArgNo);
- CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+ Value *Arg = CB->getArgOperand(ArgNo);
+ CB->setArgOperand(ArgNo, UndefValue::get(Arg->getType()));
++NumArgumentsReplacedWithUndef;
Changed = true;
}
@@ -391,8 +389,8 @@ DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses,
return MarkIfNotLive(Use, MaybeLiveUses);
} else {
DeadArgumentEliminationPass::Liveness Result = MaybeLive;
- for (unsigned i = 0; i < NumRetVals(F); ++i) {
- RetOrArg Use = CreateRet(F, i);
+ for (unsigned Ri = 0; Ri < NumRetVals(F); ++Ri) {
+ RetOrArg Use = CreateRet(F, Ri);
// We might be live, depending on the liveness of Use. If any
// sub-value is live, then the entire value is considered live. This
// is a conservative choice, and better tracking is possible.
@@ -424,28 +422,27 @@ DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses,
return Result;
}
- if (auto CS = ImmutableCallSite(V)) {
- const Function *F = CS.getCalledFunction();
+ if (const auto *CB = dyn_cast<CallBase>(V)) {
+ const Function *F = CB->getCalledFunction();
if (F) {
// Used in a direct call.
// The function argument is live if it is used as a bundle operand.
- if (CS.isBundleOperand(U))
+ if (CB->isBundleOperand(U))
return Live;
// Find the argument number. We know for sure that this use is an
// argument, since if it was the function argument this would be an
// indirect call and the we know can't be looking at a value of the
// label type (for the invoke instruction).
- unsigned ArgNo = CS.getArgumentNo(U);
+ unsigned ArgNo = CB->getArgOperandNo(U);
if (ArgNo >= F->getFunctionType()->getNumParams())
// The value is passed in through a vararg! Must be live.
return Live;
- assert(CS.getArgument(ArgNo)
- == CS->getOperand(U->getOperandNo())
- && "Argument is not where we expected it");
+ assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) &&
+ "Argument is not where we expected it");
// Value passed to a normal call. It's only live when the corresponding
// argument to the called function turns out live.
@@ -485,9 +482,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
- // Functions with inalloca parameters are expecting args in a particular
- // register and memory layout.
- if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+ // Functions with inalloca/preallocated parameters are expecting args in a
+ // particular register and memory layout.
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
MarkLive(F);
return;
}
@@ -555,24 +553,17 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
for (const Use &U : F.uses()) {
// If the function is PASSED IN as an argument, its address has been
// taken.
- ImmutableCallSite CS(U.getUser());
- if (!CS || !CS.isCallee(&U)) {
+ const auto *CB = dyn_cast<CallBase>(U.getUser());
+ if (!CB || !CB->isCallee(&U)) {
MarkLive(F);
return;
}
// The number of arguments for `musttail` call must match the number of
// arguments of the caller
- if (CS.isMustTailCall())
+ if (CB->isMustTailCall())
HasMustTailCallers = true;
- // If this use is anything other than a call site, the function is alive.
- const Instruction *TheCall = CS.getInstruction();
- if (!TheCall) { // Not a direct call site?
- MarkLive(F);
- return;
- }
-
// If we end up here, we are looking at a direct call to our function.
// Now, check how our return value(s) is/are used in this caller. Don't
@@ -581,7 +572,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
continue;
// Check all uses of the return value.
- for (const Use &U : TheCall->uses()) {
+ for (const Use &U : CB->uses()) {
if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
// This use uses a part of our return value, survey the uses of
// that part and store the results for this index only.
@@ -600,10 +591,10 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
RetValLiveness.assign(RetCount, Live);
break;
} else {
- for (unsigned i = 0; i != RetCount; ++i) {
- if (RetValLiveness[i] != Live)
- MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
- MaybeLiveAggregateUses.end());
+ for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
+ if (RetValLiveness[Ri] != Live)
+ MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
}
}
}
@@ -616,17 +607,17 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
}
// Now we've inspected all callers, record the liveness of our return values.
- for (unsigned i = 0; i != RetCount; ++i)
- MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+ for (unsigned Ri = 0; Ri != RetCount; ++Ri)
+ MarkValue(CreateRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]);
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: "
<< F.getName() << "\n");
// Now, check all of our arguments.
- unsigned i = 0;
+ unsigned ArgI = 0;
UseVector MaybeLiveArgUses;
- for (Function::const_arg_iterator AI = F.arg_begin(),
- E = F.arg_end(); AI != E; ++AI, ++i) {
+ for (Function::const_arg_iterator AI = F.arg_begin(), E = F.arg_end();
+ AI != E; ++AI, ++ArgI) {
Liveness Result;
if (F.getFunctionType()->isVarArg() || HasMustTailCallers ||
HasMustTailCalls) {
@@ -649,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
}
// Mark the result.
- MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+ MarkValue(CreateArg(&F, ArgI), Result, MaybeLiveArgUses);
// Clear the vector again for the next iteration.
MaybeLiveArgUses.clear();
}
@@ -684,11 +675,11 @@ void DeadArgumentEliminationPass::MarkLive(const Function &F) {
// Mark the function as live.
LiveFunctions.insert(&F);
// Mark all arguments as live.
- for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
- PropagateLiveness(CreateArg(&F, i));
+ for (unsigned ArgI = 0, E = F.arg_size(); ArgI != E; ++ArgI)
+ PropagateLiveness(CreateArg(&F, ArgI));
// Mark all return values as live.
- for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
- PropagateLiveness(CreateRet(&F, i));
+ for (unsigned Ri = 0, E = NumRetVals(&F); Ri != E; ++Ri)
+ PropagateLiveness(CreateRet(&F, Ri));
}
/// MarkLive - Mark the given return value or argument as live. Additionally,
@@ -749,19 +740,19 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Construct the new parameter list from non-dead arguments. Also construct
// a new set of parameter attributes to correspond. Skip the first parameter
// attribute, since that belongs to the return value.
- unsigned i = 0;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++i) {
- RetOrArg Arg = CreateArg(F, i);
+ unsigned ArgI = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgI) {
+ RetOrArg Arg = CreateArg(F, ArgI);
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
- ArgAlive[i] = true;
- ArgAttrVec.push_back(PAL.getParamAttributes(i));
- HasLiveReturnedArg |= PAL.hasParamAttribute(i, Attribute::Returned);
+ ArgAlive[ArgI] = true;
+ ArgAttrVec.push_back(PAL.getParamAttributes(ArgI));
+ HasLiveReturnedArg |= PAL.hasParamAttribute(ArgI, Attribute::Returned);
} else {
++NumArgumentsEliminated;
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument "
- << i << " (" << I->getName() << ") from "
+ << ArgI << " (" << I->getName() << ") from "
<< F->getName() << "\n");
}
}
@@ -798,16 +789,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
NRetTy = RetTy;
} else {
// Look at each of the original return values individually.
- for (unsigned i = 0; i != RetCount; ++i) {
- RetOrArg Ret = CreateRet(F, i);
+ for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
+ RetOrArg Ret = CreateRet(F, Ri);
if (LiveValues.erase(Ret)) {
- RetTypes.push_back(getRetComponentType(F, i));
- NewRetIdxs[i] = RetTypes.size() - 1;
+ RetTypes.push_back(getRetComponentType(F, Ri));
+ NewRetIdxs[Ri] = RetTypes.size() - 1;
} else {
++NumRetValsEliminated;
LLVM_DEBUG(
dbgs() << "DeadArgumentEliminationPass - Removing return value "
- << i << " from " << F->getName() << "\n");
+ << Ri << " from " << F->getName() << "\n");
}
}
if (RetTypes.size() > 1) {
@@ -876,11 +867,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// to pass in a smaller number of arguments into the new function.
std::vector<Value*> Args;
while (!F->use_empty()) {
- CallSite CS(F->user_back());
- Instruction *Call = CS.getInstruction();
+ CallBase &CB = cast<CallBase>(*F->user_back());
ArgAttrVec.clear();
- const AttributeList &CallPAL = CS.getAttributes();
+ const AttributeList &CallPAL = CB.getAttributes();
// Adjust the call return attributes in case the function was changed to
// return void.
@@ -890,15 +880,15 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
- CallSite::arg_iterator I = CS.arg_begin();
- unsigned i = 0;
+ auto I = CB.arg_begin();
+ unsigned Pi = 0;
// Loop over those operands, corresponding to the normal arguments to the
// original function, and add those that are still alive.
- for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
- if (ArgAlive[i]) {
+ for (unsigned E = FTy->getNumParams(); Pi != E; ++I, ++Pi)
+ if (ArgAlive[Pi]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- AttributeSet Attrs = CallPAL.getParamAttributes(i);
+ AttributeSet Attrs = CallPAL.getParamAttributes(Pi);
if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
@@ -915,9 +905,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
}
// Push any varargs arguments on the list. Don't forget their attributes.
- for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+ for (auto E = CB.arg_end(); I != E; ++I, ++Pi) {
Args.push_back(*I);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(i));
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(Pi));
}
// Reconstruct the AttributesList based on the vector we constructed.
@@ -932,44 +922,41 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
F->getContext(), FnAttrs, RetAttrs, ArgAttrVec);
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ CB.getOperandBundlesAsDefs(OpBundles);
- CallSite NewCS;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call->getParent());
+ CallBase *NewCB = nullptr;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+ NewCB = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", CB.getParent());
} else {
- NewCS = CallInst::Create(NFTy, NF, Args, OpBundles, "", Call);
- cast<CallInst>(NewCS.getInstruction())
- ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+ NewCB = CallInst::Create(NFTy, NF, Args, OpBundles, "", &CB);
+ cast<CallInst>(NewCB)->setTailCallKind(
+ cast<CallInst>(&CB)->getTailCallKind());
}
- NewCS.setCallingConv(CS.getCallingConv());
- NewCS.setAttributes(NewCallPAL);
- NewCS->setDebugLoc(Call->getDebugLoc());
- uint64_t W;
- if (Call->extractProfTotalWeight(W))
- NewCS->setProfWeight(W);
+ NewCB->setCallingConv(CB.getCallingConv());
+ NewCB->setAttributes(NewCallPAL);
+ NewCB->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
ArgAttrVec.clear();
- Instruction *New = NewCS.getInstruction();
- if (!Call->use_empty() || Call->isUsedByMetadata()) {
- if (New->getType() == Call->getType()) {
+ if (!CB.use_empty() || CB.isUsedByMetadata()) {
+ if (NewCB->getType() == CB.getType()) {
// Return type not changed? Just replace users then.
- Call->replaceAllUsesWith(New);
- New->takeName(Call);
- } else if (New->getType()->isVoidTy()) {
+ CB.replaceAllUsesWith(NewCB);
+ NewCB->takeName(&CB);
+ } else if (NewCB->getType()->isVoidTy()) {
// If the return value is dead, replace any uses of it with undef
// (any non-debug value uses will get removed later on).
- if (!Call->getType()->isX86_MMXTy())
- Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ if (!CB.getType()->isX86_MMXTy())
+ CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
} else {
assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
" must have been a struct or an array!");
- Instruction *InsertPt = Call;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+ Instruction *InsertPt = &CB;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+ BasicBlock *NewEdge =
+ SplitEdge(NewCB->getParent(), II->getNormalDest());
InsertPt = &*NewEdge->getFirstInsertionPt();
}
@@ -979,30 +966,30 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
//
// Start out building up our return value from undef
Value *RetVal = UndefValue::get(RetTy);
- for (unsigned i = 0; i != RetCount; ++i)
- if (NewRetIdxs[i] != -1) {
+ for (unsigned Ri = 0; Ri != RetCount; ++Ri)
+ if (NewRetIdxs[Ri] != -1) {
Value *V;
+ IRBuilder<NoFolder> IRB(InsertPt);
if (RetTypes.size() > 1)
// We are still returning a struct, so extract the value from our
// return value
- V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
- InsertPt);
+ V = IRB.CreateExtractValue(NewCB, NewRetIdxs[Ri], "newret");
else
// We are now returning a single element, so just insert that
- V = New;
+ V = NewCB;
// Insert the value at the old position
- RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+ RetVal = IRB.CreateInsertValue(RetVal, V, Ri, "oldret");
}
// Now, replace all uses of the old call instruction with the return
// struct we built
- Call->replaceAllUsesWith(RetVal);
- New->takeName(Call);
+ CB.replaceAllUsesWith(RetVal);
+ NewCB->takeName(&CB);
}
}
// Finally, remove the old call from the program, reducing the use-count of
// F.
- Call->eraseFromParent();
+ CB.eraseFromParent();
}
// Since we have now created the new function, splice the body of the old
@@ -1012,10 +999,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
- i = 0;
+ ArgI = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
- I2 = NF->arg_begin(); I != E; ++I, ++i)
- if (ArgAlive[i]) {
+ I2 = NF->arg_begin();
+ I != E; ++I, ++ArgI)
+ if (ArgAlive[ArgI]) {
// If this is a live argument, move the name and users over to the new
// version.
I->replaceAllUsesWith(&*I2);
@@ -1033,11 +1021,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (F->getReturnType() != NF->getReturnType())
for (BasicBlock &BB : *NF)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
- Value *RetVal;
+ IRBuilder<NoFolder> IRB(RI);
+ Value *RetVal = nullptr;
- if (NFTy->getReturnType()->isVoidTy()) {
- RetVal = nullptr;
- } else {
+ if (!NFTy->getReturnType()->isVoidTy()) {
assert(RetTy->isStructTy() || RetTy->isArrayTy());
// The original return value was a struct or array, insert
// extractvalue/insertvalue chains to extract only the values we need
@@ -1047,16 +1034,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
Value *OldRet = RI->getOperand(0);
// Start out building up our return value from undef
RetVal = UndefValue::get(NRetTy);
- for (unsigned i = 0; i != RetCount; ++i)
- if (NewRetIdxs[i] != -1) {
- ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
- "oldret", RI);
+ for (unsigned RetI = 0; RetI != RetCount; ++RetI)
+ if (NewRetIdxs[RetI] != -1) {
+ Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret");
+
if (RetTypes.size() > 1) {
// We're still returning a struct, so reinsert the value into
// our new return value at the new index
- RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
- "newret", RI);
+ RetVal = IRB.CreateInsertValue(RetVal, EV, NewRetIdxs[RetI],
+ "newret");
} else {
// We are now only returning a simple value, so just return the
// extracted value.
@@ -1066,7 +1053,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
}
// Replace the return instruction with one returning the new return
// value (possibly 0 if we became void).
- ReturnInst::Create(F->getContext(), RetVal, RI);
+ auto *NewRet = ReturnInst::Create(F->getContext(), RetVal, RI);
+ NewRet->setDebugLoc(RI->getDebugLoc());
BB.getInstList().erase(RI);
}
diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp
index f77b528fc42da..b45766a8e7833 100644
--- a/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -54,6 +54,7 @@ namespace {
class GVExtractorPass : public ModulePass {
SetVector<GlobalValue *> Named;
bool deleteStuff;
+ bool keepConstInit;
public:
static char ID; // Pass identification, replacement for typeid
@@ -61,8 +62,9 @@ namespace {
/// Otherwise, it deletes as much of the module as possible, except for the
/// global values specified.
explicit GVExtractorPass(std::vector<GlobalValue*> &GVs,
- bool deleteS = true)
- : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+ bool deleteS = true, bool keepConstInit = false)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS),
+ keepConstInit(keepConstInit) {}
bool runOnModule(Module &M) override {
if (skipModule(M))
@@ -83,7 +85,8 @@ namespace {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
bool Delete =
- deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
+ deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration() &&
+ (!I->isConstant() || !keepConstInit);
if (!Delete) {
if (I->hasAvailableExternallyLinkage())
continue;
@@ -156,6 +159,6 @@ namespace {
}
ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
- bool deleteFn) {
- return new GVExtractorPass(GVs, deleteFn);
+ bool deleteFn, bool keepConstInit) {
+ return new GVExtractorPass(GVs, deleteFn, keepConstInit);
}
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index b6d0b2e35694b..4baeaa6e16304 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -33,7 +33,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -160,8 +159,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
- for (CallSite::arg_iterator CI = Call->arg_begin(), CE = Call->arg_end();
- CI != CE; ++CI) {
+ for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) {
Value *Arg = *CI;
if (!Arg->getType()->isPtrOrPtrVectorTy())
continue;
@@ -362,13 +360,13 @@ struct ArgumentUsesTracker : public CaptureTracker {
void tooManyUses() override { Captured = true; }
bool captured(const Use *U) override {
- CallSite CS(U->getUser());
- if (!CS.getInstruction()) {
+ CallBase *CB = dyn_cast<CallBase>(U->getUser());
+ if (!CB) {
Captured = true;
return true;
}
- Function *F = CS.getCalledFunction();
+ Function *F = CB->getCalledFunction();
if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) {
Captured = true;
return true;
@@ -379,14 +377,14 @@ struct ArgumentUsesTracker : public CaptureTracker {
// these.
unsigned UseIndex =
- std::distance(const_cast<const Use *>(CS.arg_begin()), U);
+ std::distance(const_cast<const Use *>(CB->arg_begin()), U);
- assert(UseIndex < CS.data_operands_size() &&
+ assert(UseIndex < CB->data_operands_size() &&
"Indirect function calls should have been filtered above!");
- if (UseIndex >= CS.getNumArgOperands()) {
+ if (UseIndex >= CB->getNumArgOperands()) {
// Data operand, but not a argument operand -- must be a bundle operand
- assert(CS.hasOperandBundles() && "Must be!");
+ assert(CB->hasOperandBundles() && "Must be!");
// CaptureTracking told us that we're being captured by an operand bundle
// use. In this case it does not matter if the callee is within our SCC
@@ -449,7 +447,7 @@ determinePointerReadAttrs(Argument *A,
SmallPtrSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
- if (A->hasInAllocaAttr())
+ if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
return Attribute::None;
bool IsRead = false;
@@ -490,15 +488,15 @@ determinePointerReadAttrs(Argument *A,
Worklist.push_back(&UU);
};
- CallSite CS(I);
- if (CS.doesNotAccessMemory()) {
+ CallBase &CB = cast<CallBase>(*I);
+ if (CB.doesNotAccessMemory()) {
AddUsersToWorklistIfCapturing();
continue;
}
- Function *F = CS.getCalledFunction();
+ Function *F = CB.getCalledFunction();
if (!F) {
- if (CS.onlyReadsMemory()) {
+ if (CB.onlyReadsMemory()) {
IsRead = true;
AddUsersToWorklistIfCapturing();
continue;
@@ -510,23 +508,23 @@ determinePointerReadAttrs(Argument *A,
// operands. This means there is no need to adjust UseIndex to account
// for these.
- unsigned UseIndex = std::distance(CS.arg_begin(), U);
+ unsigned UseIndex = std::distance(CB.arg_begin(), U);
// U cannot be the callee operand use: since we're exploring the
// transitive uses of an Argument, having such a use be a callee would
- // imply the CallSite is an indirect call or invoke; and we'd take the
+ // imply the call site is an indirect call or invoke; and we'd take the
// early exit above.
- assert(UseIndex < CS.data_operands_size() &&
+ assert(UseIndex < CB.data_operands_size() &&
"Data operand use expected!");
- bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+ bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands();
if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
assert(F->isVarArg() && "More params than args in non-varargs call");
return Attribute::None;
}
- Captures &= !CS.doesNotCapture(UseIndex);
+ Captures &= !CB.doesNotCapture(UseIndex);
// Since the optimizer (by design) cannot see the data flow corresponding
// to a operand bundle use, these cannot participate in the optimistic SCC
@@ -535,12 +533,12 @@ determinePointerReadAttrs(Argument *A,
if (IsOperandBundleUse ||
!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) {
- // The accessors used on CallSite here do the right thing for calls and
+ // The accessors used on call site here do the right thing for calls and
// invokes with operand bundles.
- if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+ if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex))
return Attribute::None;
- if (!CS.doesNotAccessMemory(UseIndex))
+ if (!CB.doesNotAccessMemory(UseIndex))
IsRead = true;
}
@@ -638,8 +636,8 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
// callsite.
BasicBlock &Entry = F.getEntryBlock();
for (Instruction &I : Entry) {
- if (auto CS = CallSite(&I)) {
- if (auto *CalledFunc = CS.getCalledFunction()) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (auto *CalledFunc = CB->getCalledFunction()) {
for (auto &CSArg : CalledFunc->args()) {
if (!CSArg.hasNonNullAttr())
continue;
@@ -647,7 +645,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
// If the non-null callsite argument operand is an argument to 'F'
// (the caller) and the call is guaranteed to execute, then the value
// must be non-null throughout 'F'.
- auto *FArg = dyn_cast<Argument>(CS.getArgOperand(CSArg.getArgNo()));
+ auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo()));
if (FArg && !FArg->hasNonNullAttr()) {
FArg->addAttr(Attribute::NonNull);
Changed = true;
@@ -904,10 +902,10 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
break;
case Instruction::Call:
case Instruction::Invoke: {
- CallSite CS(RVI);
- if (CS.hasRetAttr(Attribute::NoAlias))
+ CallBase &CB = cast<CallBase>(*RVI);
+ if (CB.hasRetAttr(Attribute::NoAlias))
break;
- if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+ if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction()))
break;
LLVM_FALLTHROUGH;
}
@@ -1013,8 +1011,8 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
}
case Instruction::Call:
case Instruction::Invoke: {
- CallSite CS(RVI);
- Function *Callee = CS.getCalledFunction();
+ CallBase &CB = cast<CallBase>(*RVI);
+ Function *Callee = CB.getCalledFunction();
// A call to a node within the SCC is assumed to return null until
// proven otherwise
if (Callee && SCCNodes.count(Callee)) {
@@ -1223,10 +1221,11 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
static bool InstrBreaksNonConvergent(Instruction &I,
const SCCNodeSet &SCCNodes) {
- const CallSite CS(&I);
+ const CallBase *CB = dyn_cast<CallBase>(&I);
// Breaks non-convergent assumption if CS is a convergent call to a function
// not in the SCC.
- return CS && CS.isConvergent() && SCCNodes.count(CS.getCalledFunction()) == 0;
+ return CB && CB->isConvergent() &&
+ SCCNodes.count(CB->getCalledFunction()) == 0;
}
/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
@@ -1247,11 +1246,11 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
/// Helper for NoFree inference predicate InstrBreaksAttribute.
static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
- CallSite CS(&I);
- if (!CS)
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
return false;
- Function *Callee = CS.getCalledFunction();
+ Function *Callee = CB->getCalledFunction();
if (!Callee)
return true;
@@ -1306,7 +1305,7 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
// Skip non-throwing functions.
[](const Function &F) { return F.doesNotThrow(); },
// Instructions that break non-throwing assumption.
- [SCCNodes](Instruction &I) {
+ [&SCCNodes](Instruction &I) {
return InstrBreaksNonThrowing(I, SCCNodes);
},
[](Function &F) {
@@ -1329,7 +1328,7 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
// Skip functions known not to free memory.
[](const Function &F) { return F.doesNotFreeMemory(); },
// Instructions that break non-deallocating assumption.
- [SCCNodes](Instruction &I) {
+ [&SCCNodes](Instruction &I) {
return InstrBreaksNoFree(I, SCCNodes);
},
[](Function &F) {
@@ -1368,8 +1367,8 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// marked norecurse, so any called from F to F will not be marked norecurse.
for (auto &BB : *F)
for (auto &I : BB.instructionsWithoutDebug())
- if (auto CS = CallSite(&I)) {
- Function *Callee = CS.getCalledFunction();
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ Function *Callee = CB->getCalledFunction();
if (!Callee || Callee == F || !Callee->doesNotRecurse())
// Function calls a potentially recursive function.
return false;
@@ -1439,8 +1438,8 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
// function.
if (!HasUnknownCall)
for (Instruction &I : instructions(F))
- if (auto CS = CallSite(&I))
- if (!CS.getCalledFunction()) {
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (!CB->getCalledFunction()) {
HasUnknownCall = true;
break;
}
@@ -1575,8 +1574,8 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
auto *I = dyn_cast<Instruction>(U);
if (!I)
return false;
- CallSite CS(I);
- if (!CS || !CS.getParent()->getParent()->doesNotRecurse())
+ CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB || !CB->getParent()->getParent()->doesNotRecurse())
return false;
}
return setDoesNotRecurse(F);
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index be0446a946ec5..468bf19f2e48a 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -306,28 +306,21 @@ static void computeImportForReferencedGlobals(
RefSummary->modulePath() != Summary.modulePath();
};
- auto MarkExported = [&](const ValueInfo &VI, const GlobalValueSummary *S) {
- if (ExportLists)
- (*ExportLists)[S->modulePath()].insert(VI);
- };
-
for (auto &RefSummary : VI.getSummaryList())
if (isa<GlobalVarSummary>(RefSummary.get()) &&
Index.canImportGlobalVar(RefSummary.get(), /* AnalyzeRefs */ true) &&
!LocalNotInModule(RefSummary.get())) {
auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
- // Only update stat if we haven't already imported this variable.
- if (ILI.second)
- NumImportedGlobalVarsThinLink++;
- MarkExported(VI, RefSummary.get());
- // Promote referenced functions and variables. We don't promote
- // objects referenced by writeonly variable initializer, because
- // we convert such variables initializers to "zeroinitializer".
- // See processGlobalForThinLTO.
- if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get())))
- for (const auto &VI : RefSummary->refs())
- for (const auto &RefFn : VI.getSummaryList())
- MarkExported(VI, RefFn.get());
+ // Only update stat and exports if we haven't already imported this
+ // variable.
+ if (!ILI.second)
+ break;
+ NumImportedGlobalVarsThinLink++;
+ // Any references made by this variable will be marked exported later,
+ // in ComputeCrossModuleImport, after import decisions are complete,
+ // which is more efficient than adding them here.
+ if (ExportLists)
+ (*ExportLists)[RefSummary->modulePath()].insert(VI);
break;
}
}
@@ -494,24 +487,11 @@ static void computeImportForFunction(
NumImportedCriticalFunctionsThinLink++;
}
- // Make exports in the source module.
- if (ExportLists) {
- auto &ExportList = (*ExportLists)[ExportModulePath];
- ExportList.insert(VI);
- if (!PreviouslyImported) {
- // This is the first time this function was exported from its source
- // module, so mark all functions and globals it references as exported
- // to the outside if they are defined in the same source module.
- // For efficiency, we unconditionally add all the referenced GUIDs
- // to the ExportList for this module, and will prune out any not
- // defined in the module later in a single pass.
- for (auto &Edge : ResolvedCalleeSummary->calls())
- ExportList.insert(Edge.first);
-
- for (auto &Ref : ResolvedCalleeSummary->refs())
- ExportList.insert(Ref);
- }
- }
+ // Any calls/references made by this function will be marked exported
+ // later, in ComputeCrossModuleImport, after import decisions are
+ // complete, which is more efficient than adding them here.
+ if (ExportLists)
+ (*ExportLists)[ExportModulePath].insert(VI);
}
auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
@@ -678,20 +658,55 @@ void llvm::ComputeCrossModuleImport(
&ExportLists);
}
- // When computing imports we added all GUIDs referenced by anything
- // imported from the module to its ExportList. Now we prune each ExportList
- // of any not defined in that module. This is more efficient than checking
- // while computing imports because some of the summary lists may be long
- // due to linkonce (comdat) copies.
+ // When computing imports we only added the variables and functions being
+ // imported to the export list. We also need to mark any references and calls
+ // they make as exported as well. We do this here, as it is more efficient
+ // since we may import the same values multiple times into different modules
+ // during the import computation.
for (auto &ELI : ExportLists) {
+ FunctionImporter::ExportSetTy NewExports;
const auto &DefinedGVSummaries =
ModuleToDefinedGVSummaries.lookup(ELI.first());
- for (auto EI = ELI.second.begin(); EI != ELI.second.end();) {
+ for (auto &EI : ELI.second) {
+ // Find the copy defined in the exporting module so that we can mark the
+ // values it references in that specific definition as exported.
+ // Below we will add all references and called values, without regard to
+ // whether they are also defined in this module. We subsequently prune the
+ // list to only include those defined in the exporting module, see comment
+ // there as to why.
+ auto DS = DefinedGVSummaries.find(EI.getGUID());
+ // Anything marked exported during the import computation must have been
+ // defined in the exporting module.
+ assert(DS != DefinedGVSummaries.end());
+ auto *S = DS->getSecond();
+ S = S->getBaseObject();
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
+ // Export referenced functions and variables. We don't export/promote
+ // objects referenced by writeonly variable initializer, because
+ // we convert such variables initializers to "zeroinitializer".
+ // See processGlobalForThinLTO.
+ if (!Index.isWriteOnly(GVS))
+ for (const auto &VI : GVS->refs())
+ NewExports.insert(VI);
+ } else {
+ auto *FS = cast<FunctionSummary>(S);
+ for (auto &Edge : FS->calls())
+ NewExports.insert(Edge.first);
+ for (auto &Ref : FS->refs())
+ NewExports.insert(Ref);
+ }
+ }
+ // Prune list computed above to only include values defined in the exporting
+ // module. We do this after the above insertion since we may hit the same
+ // ref/call target multiple times in above loop, and it is more efficient to
+ // avoid a set lookup each time.
+ for (auto EI = NewExports.begin(); EI != NewExports.end();) {
if (!DefinedGVSummaries.count(EI->getGUID()))
- ELI.second.erase(EI++);
+ NewExports.erase(EI++);
else
++EI;
}
+ ELI.second.insert(NewExports.begin(), NewExports.end());
}
assert(checkVariableImport(Index, ImportLists, ExportLists));
@@ -913,11 +928,12 @@ void llvm::gatherImportedSummariesForModule(
const FunctionImporter::ImportMapTy &ImportList,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
// Include all summaries from the importing module.
- ModuleToSummariesForIndex[ModulePath] =
+ ModuleToSummariesForIndex[std::string(ModulePath)] =
ModuleToDefinedGVSummaries.lookup(ModulePath);
// Include summaries for imports.
for (auto &ILI : ImportList) {
- auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()];
+ auto &SummariesForIndex =
+ ModuleToSummariesForIndex[std::string(ILI.first())];
const auto &DefinedGVSummaries =
ModuleToDefinedGVSummaries.lookup(ILI.first());
for (auto &GI : ILI.second) {
@@ -976,6 +992,8 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
GV.replaceAllUsesWith(NewGV);
return false;
}
+ if (!GV.isImplicitDSOLocal())
+ GV.setDSOLocal(false);
return true;
}
@@ -1214,8 +1232,15 @@ Expected<bool> FunctionImporter::importFunctions(
// have loaded all the required metadata!
UpgradeDebugInfo(*SrcModule);
+ // Set the partial sample profile ratio in the profile summary module flag
+ // of the imported source module, if applicable, so that the profile summary
+ // module flag will match with that of the destination module when it's
+ // imported.
+ SrcModule->setPartialSampleProfileRatio(Index);
+
// Link in the specified functions.
- if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
+ if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
+ &GlobalsToImport))
return true;
if (PrintImports) {
@@ -1224,10 +1249,12 @@ Expected<bool> FunctionImporter::importFunctions(
<< " from " << SrcModule->getSourceFileName() << "\n";
}
- if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(),
- [](GlobalValue &, IRMover::ValueAdder) {},
- /*IsPerformingImport=*/true))
- report_fatal_error("Function Import: link error");
+ if (Error Err = Mover.move(
+ std::move(SrcModule), GlobalsToImport.getArrayRef(),
+ [](GlobalValue &, IRMover::ValueAdder) {},
+ /*IsPerformingImport=*/true))
+ report_fatal_error("Function Import: link error: " +
+ toString(std::move(Err)));
ImportedCount += GlobalsToImport.size();
NumImportedModules++;
@@ -1284,16 +1311,18 @@ static bool doImportingForModule(Module &M) {
// Next we need to promote to global scope and rename any local values that
// are potentially exported to other modules.
- if (renameModuleForThinLTO(M, *Index, nullptr)) {
+ if (renameModuleForThinLTO(M, *Index, /*clearDSOOnDeclarations=*/false,
+ /*GlobalsToImport=*/nullptr)) {
errs() << "Error renaming module\n";
return false;
}
// Perform the import now.
auto ModuleLoader = [&M](StringRef Identifier) {
- return loadFile(Identifier, M.getContext());
+ return loadFile(std::string(Identifier), M.getContext());
};
- FunctionImporter Importer(*Index, ModuleLoader);
+ FunctionImporter Importer(*Index, ModuleLoader,
+ /*ClearDSOLocalOnDeclarations=*/false);
Expected<bool> Result = Importer.importFunctions(M, ImportList);
// FIXME: Probably need to propagate Errors through the pass manager.
diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 72b8d7522f04f..fb4cb23b837e0 100644
--- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -263,6 +263,15 @@ void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) {
if (!ClEnableVFE)
return;
+ // If the Virtual Function Elim module flag is present and set to zero, then
+ // the vcall_visibility metadata was inserted for another optimization (WPD)
+ // and we may not have type checked loads on all accesses to the vtable.
+ // Don't attempt VFE in that case.
+ auto *Val = mdconst::dyn_extract_or_null<ConstantInt>(
+ M.getModuleFlag("Virtual Function Elim"));
+ if (!Val || Val->getZExtValue() == 0)
+ return;
+
ScanVTables(M);
if (VFESafeVTables.empty())
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 0fd966457ece4..d9fb820f7cb53 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -28,7 +28,6 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -41,6 +40,7 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -128,13 +128,16 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
Type *Ty = Types.pop_back_val();
switch (Ty->getTypeID()) {
default: break;
- case Type::PointerTyID: return true;
+ case Type::PointerTyID:
+ return true;
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID:
+ if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
+ return true;
+ break;
case Type::ArrayTyID:
- case Type::VectorTyID: {
- SequentialType *STy = cast<SequentialType>(Ty);
- Types.push_back(STy->getElementType());
+ Types.push_back(cast<ArrayType>(Ty)->getElementType());
break;
- }
case Type::StructTyID: {
StructType *STy = cast<StructType>(Ty);
if (STy->isOpaque()) return true;
@@ -142,7 +145,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
E = STy->element_end(); I != E; ++I) {
Type *InnerTy = *I;
if (isa<PointerType>(InnerTy)) return true;
- if (isa<CompositeType>(InnerTy))
+ if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
+ isa<VectorType>(InnerTy))
Types.push_back(InnerTy);
}
break;
@@ -191,10 +195,10 @@ CleanupPointerRootUsers(GlobalVariable *GV,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// A brief explanation of leak checkers. The goal is to find bugs where
// pointers are forgotten, causing an accumulating growth in memory
- // usage over time. The common strategy for leak checkers is to whitelist the
- // memory pointed to by globals at exit. This is popular because it also
- // solves another problem where the main thread of a C++ program may shut down
- // before other threads that are still expecting to use those globals. To
+ // usage over time. The common strategy for leak checkers is to explicitly
+ // allow the memory pointed to by globals at exit. This is popular because it
+ // also solves another problem where the main thread of a C++ program may shut
+ // down before other threads that are still expecting to use those globals. To
// handle that case, we expect the program may create a singleton and never
// destroy it.
@@ -433,13 +437,27 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
return true;
}
+static bool IsSRASequential(Type *T) {
+ return isa<ArrayType>(T) || isa<VectorType>(T);
+}
+static uint64_t GetSRASequentialNumElements(Type *T) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getNumElements();
+ return cast<FixedVectorType>(T)->getNumElements();
+}
+static Type *GetSRASequentialElementType(Type *T) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
+ return AT->getElementType();
+ return cast<VectorType>(T)->getElementType();
+}
static bool CanDoGlobalSRA(GlobalVariable *GV) {
Constant *Init = GV->getInitializer();
if (isa<StructType>(Init->getType())) {
// nothing to check
- } else if (SequentialType *STy = dyn_cast<SequentialType>(Init->getType())) {
- if (STy->getNumElements() > 16 && GV->hasNUsesOrMore(16))
+ } else if (IsSRASequential(Init->getType())) {
+ if (GetSRASequentialNumElements(Init->getType()) > 16 &&
+ GV->hasNUsesOrMore(16))
return false; // It's not worth it.
} else
return false;
@@ -450,14 +468,19 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) {
/// Copy over the debug info for a variable to its SRA replacements.
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
uint64_t FragmentOffsetInBits,
- uint64_t FragmentSizeInBits,
- unsigned NumElements) {
+ uint64_t FragmentSizeInBits) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;
GV->getDebugInfo(GVs);
for (auto *GVE : GVs) {
DIVariable *Var = GVE->getVariable();
+ Optional<uint64_t> VarSize = Var->getSizeInBits();
+
DIExpression *Expr = GVE->getExpression();
- if (NumElements > 1) {
+ // If the FragmentSize is smaller than the variable,
+ // emit a fragment expression.
+ // If the variable size is unknown a fragment must be
+ // emitted to be safe.
+ if (!VarSize || FragmentSizeInBits < *VarSize) {
if (auto E = DIExpression::createFragmentExpression(
Expr, FragmentOffsetInBits, FragmentSizeInBits))
Expr = *E;
@@ -486,9 +509,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
std::map<unsigned, GlobalVariable *> NewGlobals;
// Get the alignment of the global, either explicit or target-specific.
- unsigned StartAlignment = GV->getAlignment();
- if (StartAlignment == 0)
- StartAlignment = DL.getABITypeAlignment(GV->getType());
+ Align StartAlignment =
+ DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType());
// Loop over all users and create replacement variables for used aggregate
// elements.
@@ -509,8 +531,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
Type *ElTy = nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty))
ElTy = STy->getElementType(ElementIdx);
- else if (SequentialType *STy = dyn_cast<SequentialType>(Ty))
- ElTy = STy->getElementType();
+ else
+ ElTy = GetSRASequentialElementType(Ty);
assert(ElTy);
Constant *In = Init->getAggregateElement(ElementIdx);
@@ -531,29 +553,27 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
uint64_t FieldOffset = Layout.getElementOffset(ElementIdx);
- Align NewAlign(MinAlign(StartAlignment, FieldOffset));
- if (NewAlign >
- Align(DL.getABITypeAlignment(STy->getElementType(ElementIdx))))
+ Align NewAlign = commonAlignment(StartAlignment, FieldOffset);
+ if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx)))
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
- transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size,
- STy->getNumElements());
- } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+ transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size);
+ } else {
uint64_t EltSize = DL.getTypeAllocSize(ElTy);
- Align EltAlign(DL.getABITypeAlignment(ElTy));
+ Align EltAlign = DL.getABITypeAlign(ElTy);
uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
- Align NewAlign(MinAlign(StartAlignment, EltSize * ElementIdx));
+ Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx);
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
- FragmentSizeInBits, STy->getNumElements());
+ FragmentSizeInBits);
}
}
@@ -641,12 +661,12 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
return false; // Storing the value.
}
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
- if (CI->getCalledValue() != V) {
+ if (CI->getCalledOperand() != V) {
//cerr << "NONTRAPPING USE: " << *U;
return false; // Not calling the ptr
}
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
- if (II->getCalledValue() != V) {
+ if (II->getCalledOperand() != V) {
//cerr << "NONTRAPPING USE: " << *U;
return false; // Not calling the ptr
}
@@ -659,9 +679,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
// checked.
if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
return false;
- } else if (isa<ICmpInst>(U) &&
- isa<ConstantPointerNull>(U->getOperand(1))) {
- // Ignore icmp X, null
} else {
//cerr << "NONTRAPPING USE: " << *U;
return false;
@@ -706,17 +723,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
Changed = true;
}
} else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- CallSite CS(I);
- if (CS.getCalledValue() == V) {
+ CallBase *CB = cast<CallBase>(I);
+ if (CB->getCalledOperand() == V) {
// Calling through the pointer! Turn into a direct call, but be careful
// that the pointer is not also being passed as an argument.
- CS.setCalledFunction(NewV);
+ CB->setCalledOperand(NewV);
Changed = true;
bool PassedAsArg = false;
- for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
- if (CS.getArgument(i) == V) {
+ for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
+ if (CB->getArgOperand(i) == V) {
PassedAsArg = true;
- CS.setArgument(i, NewV);
+ CB->setArgOperand(i, NewV);
}
if (PassedAsArg) {
@@ -905,7 +922,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
// The global is initialized when the store to it occurs.
new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false,
- None, SI->getOrdering(), SI->getSyncScopeID(), SI);
+ Align(1), SI->getOrdering(), SI->getSyncScopeID(), SI);
SI->eraseFromParent();
continue;
}
@@ -922,7 +939,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
- InitBool->getName() + ".val", false, None,
+ InitBool->getName() + ".val", false, Align(1),
LI->getOrdering(), LI->getSyncScopeID(),
LI->isUnordered() ? (Instruction *)ICI : LI);
InitBoolUsed = true;
@@ -1729,7 +1746,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
- LI->getName() + ".b", false, None,
+ LI->getName() + ".b", false, Align(1),
LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
@@ -1739,14 +1756,14 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
}
StoreInst *NSI =
- new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(),
+ new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
SI->getSyncScopeID(), SI);
NSI->setDebugLoc(SI->getDebugLoc());
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
- LI->getName() + ".b", false, None,
+ LI->getName() + ".b", false, Align(1),
LI->getOrdering(), LI->getSyncScopeID(), LI);
Instruction *NSI;
if (IsOneZero)
@@ -2117,8 +2134,7 @@ static void ChangeCalleesToFastCall(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
- CallSite CS(cast<Instruction>(U));
- CS.setCallingConv(CallingConv::Fast);
+ cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
}
}
@@ -2135,8 +2151,8 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
- CallSite CS(cast<Instruction>(U));
- CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
+ CallBase *CB = cast<CallBase>(U);
+ CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
}
}
@@ -2175,12 +2191,12 @@ static bool hasChangeableCC(Function *F) {
/// Return true if the block containing the call site has a BlockFrequency of
/// less than ColdCCRelFreq% of the entry block.
-static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) {
+static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
const BranchProbability ColdProb(ColdCCRelFreq, 100);
- auto CallSiteBB = CS.getInstruction()->getParent();
+ auto *CallSiteBB = CB.getParent();
auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
auto CallerEntryFreq =
- CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
return CallSiteFreq < CallerEntryFreq * ColdProb;
}
@@ -2200,10 +2216,10 @@ isValidCandidateForColdCC(Function &F,
if (isa<BlockAddress>(U))
continue;
- CallSite CS(cast<Instruction>(U));
- Function *CallerFunc = CS.getInstruction()->getParent()->getParent();
+ CallBase &CB = cast<CallBase>(*U);
+ Function *CallerFunc = CB.getParent()->getParent();
BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
- if (!isColdCallSite(CS, CallerBFI))
+ if (!isColdCallSite(CB, CallerBFI))
return false;
auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
if (It == AllCallsCold.end())
@@ -2216,8 +2232,7 @@ static void changeCallSitesToColdCC(Function *F) {
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
- CallSite CS(cast<Instruction>(U));
- CS.setCallingConv(CallingConv::Cold);
+ cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
}
}
@@ -2230,7 +2245,6 @@ hasOnlyColdCalls(Function &F,
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- CallSite CS(cast<Instruction>(CI));
// Skip over isline asm instructions since they aren't function calls.
if (CI->isInlineAsm())
continue;
@@ -2247,7 +2261,7 @@ hasOnlyColdCalls(Function &F,
CalledFn->hasAddressTaken())
return false;
BlockFrequencyInfo &CallerBFI = GetBFI(F);
- if (!isColdCallSite(CS, CallerBFI))
+ if (!isColdCallSite(*CI, CallerBFI))
return false;
}
}
@@ -2255,6 +2269,115 @@ hasOnlyColdCalls(Function &F,
return true;
}
+static bool hasMustTailCallers(Function *F) {
+ for (User *U : F->users()) {
+ CallBase *CB = dyn_cast<CallBase>(U);
+ if (!CB) {
+ assert(isa<BlockAddress>(U) &&
+ "Expected either CallBase or BlockAddress");
+ continue;
+ }
+ if (CB->isMustTailCall())
+ return true;
+ }
+ return false;
+}
+
+static bool hasInvokeCallers(Function *F) {
+ for (User *U : F->users())
+ if (isa<InvokeInst>(U))
+ return true;
+ return false;
+}
+
+static void RemovePreallocated(Function *F) {
+ RemoveAttribute(F, Attribute::Preallocated);
+
+ auto *M = F->getParent();
+
+ IRBuilder<> Builder(M->getContext());
+
+ // Cannot modify users() while iterating over it, so make a copy.
+ SmallVector<User *, 4> PreallocatedCalls(F->users());
+ for (User *U : PreallocatedCalls) {
+ CallBase *CB = dyn_cast<CallBase>(U);
+ if (!CB)
+ continue;
+
+ assert(
+ !CB->isMustTailCall() &&
+ "Shouldn't call RemotePreallocated() on a musttail preallocated call");
+ // Create copy of call without "preallocated" operand bundle.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CB->getOperandBundlesAsDefs(OpBundles);
+ CallBase *PreallocatedSetup = nullptr;
+ for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
+ if (It->getTag() == "preallocated") {
+ PreallocatedSetup = cast<CallBase>(*It->input_begin());
+ OpBundles.erase(It);
+ break;
+ }
+ }
+ assert(PreallocatedSetup && "Did not find preallocated bundle");
+ uint64_t ArgCount =
+ cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
+
+ assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
+ "Unknown indirect call type");
+ CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
+ CB->replaceAllUsesWith(NewCB);
+ NewCB->takeName(CB);
+ CB->eraseFromParent();
+
+ Builder.SetInsertPoint(PreallocatedSetup);
+ auto *StackSave =
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
+
+ Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
+ StackSave);
+
+ // Replace @llvm.call.preallocated.arg() with alloca.
+ // Cannot modify users() while iterating over it, so make a copy.
+ // @llvm.call.preallocated.arg() can be called with the same index multiple
+ // times. So for each @llvm.call.preallocated.arg(), we see if we have
+ // already created a Value* for the index, and if not, create an alloca and
+ // bitcast right after the @llvm.call.preallocated.setup() so that it
+ // dominates all uses.
+ SmallVector<Value *, 2> ArgAllocas(ArgCount);
+ SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
+ for (auto *User : PreallocatedArgs) {
+ auto *UseCall = cast<CallBase>(User);
+ assert(UseCall->getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::call_preallocated_arg &&
+ "preallocated token use was not a llvm.call.preallocated.arg");
+ uint64_t AllocArgIndex =
+ cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
+ Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
+ if (!AllocaReplacement) {
+ auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
+ auto *ArgType = UseCall
+ ->getAttribute(AttributeList::FunctionIndex,
+ Attribute::Preallocated)
+ .getValueAsType();
+ auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
+ Builder.SetInsertPoint(InsertBefore);
+ auto *Alloca =
+ Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
+ auto *BitCast = Builder.CreateBitCast(
+ Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
+ ArgAllocas[AllocArgIndex] = BitCast;
+ AllocaReplacement = BitCast;
+ }
+
+ UseCall->replaceAllUsesWith(AllocaReplacement);
+ UseCall->eraseFromParent();
+ }
+ // Remove @llvm.call.preallocated.setup().
+ cast<Instruction>(PreallocatedSetup)->eraseFromParent();
+ }
+}
+
static bool
OptimizeFunctions(Module &M,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2319,11 +2442,22 @@ OptimizeFunctions(Module &M,
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
- !F->hasAddressTaken()) {
+ !F->hasAddressTaken() && !hasMustTailCallers(F)) {
RemoveAttribute(F, Attribute::InAlloca);
Changed = true;
}
+ // FIXME: handle invokes
+ // FIXME: handle musttail
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
+ if (!F->hasAddressTaken() && !hasMustTailCallers(F) &&
+ !hasInvokeCallers(F)) {
+ RemovePreallocated(F);
+ Changed = true;
+ }
+ continue;
+ }
+
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
NumInternalFunc++;
TargetTransformInfo &TTI = GetTTI(*F);
@@ -2385,7 +2519,7 @@ OptimizeGlobalVars(Module &M,
// for that optional parameter, since we don't have a Function to
// provide GetTLI anyway.
Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
- if (New && New != C)
+ if (New != C)
GV->setInitializer(New);
}
@@ -2427,8 +2561,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
}
ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
- SequentialType *InitTy = cast<SequentialType>(Init->getType());
- uint64_t NumElts = InitTy->getNumElements();
+ uint64_t NumElts;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType()))
+ NumElts = ATy->getNumElements();
+ else
+ NumElts = cast<FixedVectorType>(Init->getType())->getNumElements();
// Break up the array into elements.
for (uint64_t i = 0, e = NumElts; i != e; ++i)
@@ -2439,7 +2576,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
if (Init->getType()->isArrayTy())
- return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+ return ConstantArray::get(cast<ArrayType>(Init->getType()), Elts);
return ConstantVector::get(Elts);
}
@@ -2561,8 +2698,10 @@ static void BatchCommitValueTo(const DenseMap<Constant*, Constant*> &Mem) {
unsigned NumElts;
if (auto *STy = dyn_cast<StructType>(Ty))
NumElts = STy->getNumElements();
+ else if (auto *ATy = dyn_cast<ArrayType>(Ty))
+ NumElts = ATy->getNumElements();
else
- NumElts = cast<SequentialType>(Ty)->getNumElements();
+ NumElts = cast<FixedVectorType>(Ty)->getNumElements();
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(Init->getAggregateElement(i));
}
diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index 4a319ead23c0e..365b269dc3bf6 100644
--- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -111,6 +111,9 @@ static bool splitGlobal(GlobalVariable &GV) {
ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
Type->getOperand(1)}));
}
+
+ if (GV.hasMetadata(LLVMContext::MD_vcall_visibility))
+ SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility());
}
for (User *U : GV.users()) {
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 5e690714bfdfb..d0bd0166534a7 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -39,7 +39,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -110,8 +109,8 @@ bool unlikelyExecuted(BasicBlock &BB) {
// The block is cold if it calls/invokes a cold function. However, do not
// mark sanitizer traps as cold.
for (Instruction &I : BB)
- if (auto CS = CallSite(&I))
- if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize"))
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize"))
return true;
// The block is cold if it has an unreachable terminator, unless it's
@@ -325,11 +324,10 @@ Function *HotColdSplitting::extractColdRegion(
if (Function *OutF = CE.extractCodeRegion(CEAC)) {
User *U = *OutF->user_begin();
CallInst *CI = cast<CallInst>(U);
- CallSite CS(CI);
NumColdRegionsOutlined++;
if (TTI.useColdCCForColdCall(*OutF)) {
OutF->setCallingConv(CallingConv::Cold);
- CS.setCallingConv(CallingConv::Cold);
+ CI->setCallingConv(CallingConv::Cold);
}
CI->setIsNoInline();
@@ -458,6 +456,10 @@ public:
// first have predecessors within the extraction region.
if (mayExtractBlock(SinkBB)) {
addBlockToRegion(&SinkBB, SinkScore);
+ if (pred_empty(&SinkBB)) {
+ ColdRegion->EntireFunctionCold = true;
+ return Regions;
+ }
} else {
Regions.emplace_back();
ColdRegion = &Regions.back();
diff --git a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index 1bda13a9bdd80..8d05a72d68dac 100644
--- a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -17,7 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/AbstractCallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -53,7 +53,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {
// For each argument, keep track of its constant value and whether it is a
// constant or not. The bool is driven to true when found to be non-constant.
- SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+ SmallVector<PointerIntPair<Constant *, 1, bool>, 16> ArgumentConstants;
ArgumentConstants.resize(F.arg_size());
unsigned NumNonconstant = 0;
@@ -80,7 +80,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {
for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {
// If this argument is known non-constant, ignore it.
- if (ArgumentConstants[i].second)
+ if (ArgumentConstants[i].getInt())
continue;
Value *V = ACS.getCallArgOperand(i);
@@ -102,13 +102,13 @@ static bool PropagateConstantsIntoArguments(Function &F) {
if (++NumNonconstant == ArgumentConstants.size())
return false;
- ArgumentConstants[i].second = true;
+ ArgumentConstants[i].setInt(true);
continue;
}
- if (C && ArgumentConstants[i].first == nullptr) {
- ArgumentConstants[i].first = C; // First constant seen.
- } else if (C && ArgumentConstants[i].first == C) {
+ if (C && ArgumentConstants[i].getPointer() == nullptr) {
+ ArgumentConstants[i].setPointer(C); // First constant seen.
+ } else if (C && ArgumentConstants[i].getPointer() == C) {
// Still the constant value we think it is.
} else if (V == &*Arg) {
// Ignore recursive calls passing argument down.
@@ -117,7 +117,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {
// give up on this function.
if (++NumNonconstant == ArgumentConstants.size())
return false;
- ArgumentConstants[i].second = true;
+ ArgumentConstants[i].setInt(true);
}
}
}
@@ -128,11 +128,11 @@ static bool PropagateConstantsIntoArguments(Function &F) {
Function::arg_iterator AI = F.arg_begin();
for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
// Do we have a constant argument?
- if (ArgumentConstants[i].second || AI->use_empty() ||
- AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
+ if (ArgumentConstants[i].getInt() || AI->use_empty() ||
+ (AI->hasByValAttr() && !F.onlyReadsMemory()))
continue;
- Value *V = ArgumentConstants[i].first;
+ Value *V = ArgumentConstants[i].getPointer();
if (!V) V = UndefValue::get(AI->getType());
AI->replaceAllUsesWith(V);
++NumArgumentsProped;
@@ -222,16 +222,15 @@ static bool PropagateConstantReturn(Function &F) {
// constant.
bool MadeChange = false;
for (Use &U : F.uses()) {
- CallSite CS(U.getUser());
- Instruction* Call = CS.getInstruction();
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
// Not a call instruction or a call instruction that's not calling F
// directly?
- if (!Call || !CS.isCallee(&U))
+ if (!CB || !CB->isCallee(&U))
continue;
// Call result not used?
- if (Call->use_empty())
+ if (CB->use_empty())
continue;
MadeChange = true;
@@ -241,12 +240,12 @@ static bool PropagateConstantReturn(Function &F) {
if (Argument *A = dyn_cast<Argument>(New))
// Was an argument returned? Then find the corresponding argument in
// the call instruction and use that.
- New = CS.getArgument(A->getArgNo());
- Call->replaceAllUsesWith(New);
+ New = CB->getArgOperand(A->getArgNo());
+ CB->replaceAllUsesWith(New);
continue;
}
- for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) {
+ for (auto I = CB->user_begin(), E = CB->user_end(); I != E;) {
Instruction *Ins = cast<Instruction>(*I);
// Increment now, so we can remove the use
@@ -266,7 +265,7 @@ static bool PropagateConstantReturn(Function &F) {
if (Argument *A = dyn_cast<Argument>(New))
// Was an argument returned? Then find the corresponding argument in
// the call instruction and use that.
- New = CS.getArgument(A->getArgNo());
+ New = CB->getArgOperand(A->getArgNo());
Ins->replaceAllUsesWith(New);
Ins->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index 8a15800cbdb5b..d37b9236380d4 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -23,6 +23,7 @@
using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeOpenMPOptLegacyPassPass(Registry);
initializeArgPromotionPass(Registry);
initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);
@@ -46,6 +47,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeMergeFunctionsLegacyPassPass(Registry);
initializePartialInlinerLegacyPassPass(Registry);
initializeAttributorLegacyPassPass(Registry);
+ initializeAttributorCGSCCLegacyPassPass(Registry);
initializePostOrderFunctionAttrsLegacyPassPass(Registry);
initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
initializePruneEHPass(Registry);
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp
index e818743544e68..76f1d0c54d081 100644
--- a/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -15,7 +15,6 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
@@ -52,26 +51,26 @@ public:
static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallSite CS) override {
- Function *Callee = CS.getCalledFunction();
+ InlineCost getInlineCost(CallBase &CB) override {
+ Function *Callee = CB.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
bool RemarksEnabled = false;
- const auto &BBs = CS.getCaller()->getBasicBlockList();
+ const auto &BBs = CB.getCaller()->getBasicBlockList();
if (!BBs.empty()) {
auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
if (DI.isEnabled())
RemarksEnabled = true;
}
- OptimizationRemarkEmitter ORE(CS.getCaller());
+ OptimizationRemarkEmitter ORE(CB.getCaller());
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(
- cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache,
- /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr);
+ return llvm::getInlineCost(CB, Params, TTI, GetAssumptionCache, GetTLI,
+ /*GetBFI=*/nullptr, PSI,
+ RemarksEnabled ? &ORE : nullptr);
}
bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 4b72261131c16..7d2260f4c169d 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -28,16 +29,16 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -57,8 +58,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
@@ -77,11 +80,6 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
-// This weirdly named statistic tracks the number of times that, when attempting
-// to inline a function A into B, we analyze the callers of B in order to see
-// if those would be more profitable and blocked inline steps.
-STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
-
/// Flag to disable manual alloca merging.
///
/// Merging of allocas was originally done as a stack-size saving technique
@@ -112,14 +110,6 @@ static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
"printing of statistics for each inlined function")),
cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
-/// Flag to add inline messages as callsite attributes 'inline-remark'.
-static cl::opt<bool>
- InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
- cl::Hidden,
- cl::desc("Enable adding inline-remark attribute to"
- " callsites processed by inliner but decided"
- " to be not inlined"));
-
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -158,13 +148,13 @@ using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
/// *actually make it to the backend*, which is really what we want.
///
/// Because we don't have this information, we do this simple and useful hack.
-static void mergeInlinedArrayAllocas(
- Function *Caller, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
+static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI,
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory) {
SmallPtrSet<AllocaInst *, 16> UsedAllocas;
- // When processing our SCC, check to see if CS was inlined from some other
- // call site. For example, if we're processing "A" in this code:
+ // When processing our SCC, check to see if the call site was inlined from
+ // some other call site. For example, if we're processing "A" in this code:
// A() { B() }
// B() { x = alloca ... C() }
// C() { y = alloca ... }
@@ -180,7 +170,7 @@ static void mergeInlinedArrayAllocas(
// Loop over all the allocas we have so far and see if they can be merged with
// a previously inlined alloca. If not, remember that we had it.
- for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
+ for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E;
++AllocaNo) {
AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
@@ -201,8 +191,8 @@ static void mergeInlinedArrayAllocas(
// function. Also, AllocasForType can be empty of course!
bool MergedAwayAlloca = false;
for (AllocaInst *AvailableAlloca : AllocasForType) {
- unsigned Align1 = AI->getAlignment(),
- Align2 = AvailableAlloca->getAlignment();
+ Align Align1 = AI->getAlign();
+ Align Align2 = AvailableAlloca->getAlign();
// The available alloca has to be in the right function, not in some other
// function in this SCC.
@@ -229,18 +219,8 @@ static void mergeInlinedArrayAllocas(
AI->replaceAllUsesWith(AvailableAlloca);
- if (Align1 != Align2) {
- if (!Align1 || !Align2) {
- const DataLayout &DL = Caller->getParent()->getDataLayout();
- unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
-
- Align1 = Align1 ? Align1 : TypeAlign;
- Align2 = Align2 ? Align2 : TypeAlign;
- }
-
- if (Align1 > Align2)
- AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment()));
- }
+ if (Align1 > Align2)
+ AvailableAlloca->setAlignment(AI->getAlign());
AI->eraseFromParent();
MergedAwayAlloca = true;
@@ -271,20 +251,20 @@ static void mergeInlinedArrayAllocas(
/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
-static InlineResult InlineCallIfPossible(
- CallSite CS, InlineFunctionInfo &IFI,
+static InlineResult inlineCallIfPossible(
+ CallBase &CB, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
- Function *Callee = CS.getCalledFunction();
- Function *Caller = CS.getCaller();
+ Function *Callee = CB.getCalledFunction();
+ Function *Caller = CB.getCaller();
AAResults &AAR = AARGetter(*Callee);
// Try to inline the function. Get the list of static allocas that were
// inlined.
- InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
- if (!IR)
+ InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime);
+ if (!IR.isSuccess())
return IR;
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
@@ -298,188 +278,9 @@ static InlineResult InlineCallIfPossible(
return IR; // success
}
-/// Return true if inlining of CS can block the caller from being
-/// inlined which is proved to be more beneficial. \p IC is the
-/// estimated inline cost associated with callsite \p CS.
-/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
-/// caller if \p CS is suppressed for inlining.
-static bool
-shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
- int &TotalSecondaryCost,
- function_ref<InlineCost(CallSite CS)> GetInlineCost) {
- // For now we only handle local or inline functions.
- if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
- return false;
- // If the cost of inlining CS is non-positive, it is not going to prevent the
- // caller from being inlined into its callers and hence we don't need to
- // defer.
- if (IC.getCost() <= 0)
- return false;
- // Try to detect the case where the current inlining candidate caller (call
- // it B) is a static or linkonce-ODR function and is an inlining candidate
- // elsewhere, and the current candidate callee (call it C) is large enough
- // that inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B into
- // its callers.
- //
- // This only applies to static and linkonce-ODR functions because those are
- // expected to be available for inlining in the translation units where they
- // are used. Thus we will always have the opportunity to make local inlining
- // decisions. Importantly the linkonce-ODR linkage covers inline functions
- // and templates in C++.
- //
- // FIXME: All of this logic should be sunk into getInlineCost. It relies on
- // the internal implementation of the inline cost metrics rather than
- // treating them as truly abstract units etc.
- TotalSecondaryCost = 0;
- // The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - 1;
- // If the caller has local linkage and can be inlined to all its callers, we
- // can apply a huge negative bonus to TotalSecondaryCost.
- bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
- // This bool tracks what happens if we DO inline C into B.
- bool inliningPreventsSomeOuterInline = false;
- for (User *U : Caller->users()) {
- // If the caller will not be removed (either because it does not have a
- // local linkage or because the LastCallToStaticBonus has been already
- // applied), then we can exit the loop early.
- if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
- return false;
- CallSite CS2(U);
-
- // If this isn't a call to Caller (it could be some other sort
- // of reference) skip it. Such references will prevent the caller
- // from being removed.
- if (!CS2 || CS2.getCalledFunction() != Caller) {
- ApplyLastCallBonus = false;
- continue;
- }
-
- InlineCost IC2 = GetInlineCost(CS2);
- ++NumCallerCallersAnalyzed;
- if (!IC2) {
- ApplyLastCallBonus = false;
- continue;
- }
- if (IC2.isAlways())
- continue;
-
- // See if inlining of the original callsite would erase the cost delta of
- // this callsite. We subtract off the penalty for the call instruction,
- // which we would be deleting.
- if (IC2.getCostDelta() <= CandidateCost) {
- inliningPreventsSomeOuterInline = true;
- TotalSecondaryCost += IC2.getCost();
- }
- }
- // If all outer calls to Caller would get inlined, the cost for the last
- // one is set very low by getInlineCost, in anticipation that Caller will
- // be removed entirely. We did not account for this above unless there
- // is only one caller of Caller.
- if (ApplyLastCallBonus)
- TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
-
- if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
- return true;
-
- return false;
-}
-
-static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
- const ore::NV &Arg) {
- return R << Arg.Val;
-}
-
-template <class RemarkT>
-RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
- using namespace ore;
- if (IC.isAlways()) {
- R << "(cost=always)";
- } else if (IC.isNever()) {
- R << "(cost=never)";
- } else {
- R << "(cost=" << ore::NV("Cost", IC.getCost())
- << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
- }
- if (const char *Reason = IC.getReason())
- R << ": " << ore::NV("Reason", Reason);
- return R;
-}
-
-static std::string inlineCostStr(const InlineCost &IC) {
- std::stringstream Remark;
- Remark << IC;
- return Remark.str();
-}
-
-/// Return the cost only if the inliner should attempt to inline at the given
-/// CallSite. If we return the cost, we will emit an optimisation remark later
-/// using that cost, so we won't do so from this function.
-static Optional<InlineCost>
-shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
- OptimizationRemarkEmitter &ORE) {
- using namespace ore;
-
- InlineCost IC = GetInlineCost(CS);
- Instruction *Call = CS.getInstruction();
- Function *Callee = CS.getCalledFunction();
- Function *Caller = CS.getCaller();
-
- if (IC.isAlways()) {
- LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << "\n");
- return IC;
- }
-
- if (IC.isNever()) {
- LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because it should never be inlined "
- << IC;
- });
- return IC;
- }
-
- if (!IC) {
- LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because too costly to inline " << IC;
- });
- return IC;
- }
-
- int TotalSecondaryCost = 0;
- if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
- LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
- << " Cost = " << IC.getCost()
- << ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
- Call)
- << "Not inlining. Cost of inlining " << NV("Callee", Callee)
- << " increases the cost of inlining " << NV("Caller", Caller)
- << " in other contexts";
- });
-
- // IC does not bool() to false, so get an InlineCost that will.
- // This will not be inspected to make an error message.
- return None;
- }
-
- LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
- << ", Call: " << *CS.getInstruction() << '\n');
- return IC;
-}
-
/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
-static bool InlineHistoryIncludes(
+static bool inlineHistoryIncludes(
Function *F, int InlineHistoryID,
const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
while (InlineHistoryID != -1) {
@@ -504,33 +305,13 @@ bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
return inlineCalls(SCC);
}
-static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc,
- const BasicBlock *Block, const Function &Callee,
- const Function &Caller, const InlineCost &IC) {
- ORE.emit([&]() {
- bool AlwaysInline = IC.isAlways();
- StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
- return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
- << ore::NV("Callee", &Callee) << " inlined into "
- << ore::NV("Caller", &Caller) << " with " << IC;
- });
-}
-
-static void setInlineRemark(CallSite &CS, StringRef message) {
- if (!InlineRemarkAttribute)
- return;
-
- Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
- CS.addAttribute(AttributeList::FunctionIndex, attr);
-}
-
static bool
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
ProfileSummaryInfo *PSI,
- std::function<TargetLibraryInfo &(Function &)> GetTLI,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
bool InsertLifetime,
- function_ref<InlineCost(CallSite CS)> GetInlineCost,
+ function_ref<InlineCost(CallBase &CB)> GetInlineCost,
function_ref<AAResults &(Function &)> AARGetter,
ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
SmallPtrSet<Function *, 8> SCCFunctions;
@@ -545,7 +326,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// Scan through and identify all call sites ahead of time so that we only
// inline call sites in the original functions, not call sites that result
// from inlining other functions.
- SmallVector<std::pair<CallSite, int>, 16> CallSites;
+ SmallVector<std::pair<CallBase *, int>, 16> CallSites;
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
@@ -561,31 +342,31 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
OptimizationRemarkEmitter ORE(F);
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
- CallSite CS(cast<Value>(&I));
+ auto *CB = dyn_cast<CallBase>(&I);
// If this isn't a call, or it is a call to an intrinsic, it can
// never be inlined.
- if (!CS || isa<IntrinsicInst>(I))
+ if (!CB || isa<IntrinsicInst>(I))
continue;
// If this is a direct call to an external function, we can never inline
// it. If it is an indirect call, inlining may resolve it to be a
// direct call, so we keep it.
- if (Function *Callee = CS.getCalledFunction())
+ if (Function *Callee = CB->getCalledFunction())
if (Callee->isDeclaration()) {
using namespace ore;
- setInlineRemark(CS, "unavailable definition");
+ setInlineRemark(*CB, "unavailable definition");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", CS.getCaller())
+ << NV("Caller", CB->getCaller())
<< " because its definition is unavailable"
<< setIsVerbose();
});
continue;
}
- CallSites.push_back(std::make_pair(CS, -1));
+ CallSites.push_back(std::make_pair(CB, -1));
}
}
@@ -598,13 +379,13 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// Now that we have all of the call sites, move the ones to functions in the
// current SCC to the end of the list.
unsigned FirstCallInSCC = CallSites.size();
- for (unsigned i = 0; i < FirstCallInSCC; ++i)
- if (Function *F = CallSites[i].first.getCalledFunction())
+ for (unsigned I = 0; I < FirstCallInSCC; ++I)
+ if (Function *F = CallSites[I].first->getCalledFunction())
if (SCCFunctions.count(F))
- std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+ std::swap(CallSites[I--], CallSites[--FirstCallInSCC]);
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
+ InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -616,31 +397,28 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// calls to become direct calls.
// CallSites may be modified inside so ranged for loop can not be used.
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
- CallSite CS = CallSites[CSi].first;
+ auto &P = CallSites[CSi];
+ CallBase &CB = *P.first;
+ const int InlineHistoryID = P.second;
- Function *Caller = CS.getCaller();
- Function *Callee = CS.getCalledFunction();
+ Function *Caller = CB.getCaller();
+ Function *Callee = CB.getCalledFunction();
// We can only inline direct calls to non-declarations.
if (!Callee || Callee->isDeclaration())
continue;
- Instruction *Instr = CS.getInstruction();
-
- bool IsTriviallyDead =
- isInstructionTriviallyDead(Instr, &GetTLI(*Caller));
+ bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller));
- int InlineHistoryID;
if (!IsTriviallyDead) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
- InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
- InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
- setInlineRemark(CS, "recursive");
+ inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(CB, "recursive");
continue;
}
}
@@ -650,56 +428,49 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// just become a regular analysis dependency.
OptimizationRemarkEmitter ORE(Caller);
- Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
+ auto OIC = shouldInline(CB, GetInlineCost, ORE);
// If the policy determines that we should inline this function,
// delete the call instead.
- if (!OIC.hasValue()) {
- setInlineRemark(CS, "deferred");
- continue;
- }
-
- if (!OIC.getValue()) {
- // shouldInline() call returned a negative inline cost that explains
- // why this callsite should not be inlined.
- setInlineRemark(CS, inlineCostStr(*OIC));
+ if (!OIC)
continue;
- }
// If this call site is dead and it is to a readonly function, we should
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
if (IsTriviallyDead) {
- LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
+ LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
- setInlineRemark(CS, "trivially dead");
- CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
- Instr->eraseFromParent();
+ setInlineRemark(CB, "trivially dead");
+ CG[Caller]->removeCallEdgeFor(CB);
+ CB.eraseFromParent();
++NumCallsDeleted;
} else {
- // Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS->getDebugLoc();
- BasicBlock *Block = CS.getParent();
+ // Get DebugLoc to report. CB will be invalid after Inliner.
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *Block = CB.getParent();
// Attempt to inline the function.
using namespace ore;
- InlineResult IR = InlineCallIfPossible(
- CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
+ InlineResult IR = inlineCallIfPossible(
+ CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
InsertLifetime, AARGetter, ImportedFunctionsStats);
- if (!IR) {
- setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
+ if (!IR.isSuccess()) {
+ setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " +
+ inlineCostStr(*OIC));
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
Block)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
+ << NV("Caller", Caller) << ": "
+ << NV("Reason", IR.getFailureReason());
});
continue;
}
++NumInlined;
- emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -709,8 +480,23 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
- for (Value *Ptr : InlineInfo.InlinedCalls)
- CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+#ifndef NDEBUG
+ // Make sure no dupplicates in the inline candidates. This could
+ // happen when a callsite is simpilfied to reusing the return value
+ // of another callsite during function cloning, thus the other
+ // callsite will be reconsidered here.
+ DenseSet<CallBase *> DbgCallSites;
+ for (auto &II : CallSites)
+ DbgCallSites.insert(II.first);
+#endif
+
+ for (Value *Ptr : InlineInfo.InlinedCalls) {
+#ifndef NDEBUG
+ assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0);
+#endif
+ CallSites.push_back(
+ std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID));
+ }
}
}
@@ -759,7 +545,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {
+ GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
};
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
@@ -767,7 +553,7 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
};
return inlineCallsImpl(
SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
- [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this),
+ [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this),
ImportedFunctionsStats);
}
@@ -870,16 +656,47 @@ InlinerPass::~InlinerPass() {
}
}
+InlineAdvisor &
+InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
+ FunctionAnalysisManager &FAM, Module &M) {
+ auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
+ if (!IAA) {
+ // It should still be possible to run the inliner as a stand-alone SCC pass,
+ // for test scenarios. In that case, we default to the
+ // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass
+ // runs. It also uses just the default InlineParams.
+ // In this case, we need to use the provided FAM, which is valid for the
+ // duration of the inliner pass, and thus the lifetime of the owned advisor.
+ // The one we would get from the MAM can be invalidated as a result of the
+ // inliner's activity.
+ OwnedDefaultAdvisor.emplace(FAM, getInlineParams());
+ return *OwnedDefaultAdvisor;
+ }
+ assert(IAA->getAdvisor() &&
+ "Expected a present InlineAdvisorAnalysis also have an "
+ "InlineAdvisor initialized");
+ return *IAA->getAdvisor();
+}
+
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
- const ModuleAnalysisManager &MAM =
- AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
+ const auto &MAMProxy =
+ AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
bool Changed = false;
assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
Module &M = *InitialC.begin()->getFunction().getParent();
- ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+ ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M);
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+ .getManager();
+
+ InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
+ Advisor.onPassEntry();
+
+ auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
if (!ImportedFunctionsStats &&
InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
@@ -912,11 +729,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- SmallVector<std::pair<CallSite, int>, 16> Calls;
-
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
- .getManager();
+ SmallVector<std::pair<CallBase *, int>, 16> Calls;
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -928,17 +741,17 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// FIXME: Using instructions sequence is a really bad way to do this.
// Instead we should do an actual RPO walk of the function body.
for (Instruction &I : instructions(N.getFunction()))
- if (auto CS = CallSite(&I))
- if (Function *Callee = CS.getCalledFunction()) {
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls.push_back({CS, -1});
+ Calls.push_back({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
- setInlineRemark(CS, "unavailable definition");
+ setInlineRemark(*CB, "unavailable definition");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
<< NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", CS.getCaller())
+ << NV("Caller", CB->getCaller())
<< " because its definition is unavailable"
<< setIsVerbose();
});
@@ -969,68 +782,41 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// Loop forward over all of the calls. Note that we cannot cache the size as
// inlining can introduce new calls that need to be processed.
- for (int i = 0; i < (int)Calls.size(); ++i) {
+ for (int I = 0; I < (int)Calls.size(); ++I) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls[i].first.getCaller();
+ Function &F = *Calls[I].first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- if (F.hasOptNone()) {
- setInlineRemark(Calls[i].first, "optnone attribute");
+ if (!Calls[I].first->getCalledFunction()->hasFnAttribute(
+ Attribute::AlwaysInline) &&
+ F.hasOptNone()) {
+ setInlineRemark(*Calls[I].first, "optnone attribute");
continue;
}
LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
- // Get a FunctionAnalysisManager via a proxy for this particular node. We
- // do this each time we visit a node as the SCC may have changed and as
- // we're going to mutate this particular function we want to make sure the
- // proxy is in place to forward any invalidation events. We can use the
- // manager we get here for looking up results for functions other than this
- // node however because those functions aren't going to be mutated by this
- // pass.
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
- .getManager();
-
- // Get the remarks emission analysis for the caller.
- auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
- return FAM.getResult<BlockFrequencyAnalysis>(F);
- };
-
- auto GetInlineCost = [&](CallSite CS) {
- Function &Callee = *CS.getCalledFunction();
- auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- bool RemarksEnabled =
- Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
- DEBUG_TYPE);
- return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
- CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
- RemarksEnabled ? &ORE : nullptr);
- };
- // Now process as many calls as we have within this caller in the sequnece.
+ // Now process as many calls as we have within this caller in the sequence.
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
- int InlineHistoryID;
- CallSite CS;
- std::tie(CS, InlineHistoryID) = Calls[i];
- Function &Callee = *CS.getCalledFunction();
+ for (; I < (int)Calls.size() && Calls[I].first->getCaller() == &F; ++I) {
+ auto &P = Calls[I];
+ CallBase *CB = P.first;
+ const int InlineHistoryID = P.second;
+ Function &Callee = *CB->getCalledFunction();
if (InlineHistoryID != -1 &&
- InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
- setInlineRemark(CS, "recursive");
+ inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(*CB, "recursive");
continue;
}
@@ -1044,62 +830,53 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
- setInlineRemark(CS, "recursive SCC split");
+ setInlineRemark(*CB, "recursive SCC split");
continue;
}
- Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
+ auto Advice = Advisor.getAdvice(*CB);
// Check whether we want to inline this callsite.
- if (!OIC.hasValue()) {
- setInlineRemark(CS, "deferred");
- continue;
- }
-
- if (!OIC.getValue()) {
- // shouldInline() call returned a negative inline cost that explains
- // why this callsite should not be inlined.
- setInlineRemark(CS, inlineCostStr(*OIC));
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
continue;
}
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
- /*cg=*/nullptr, &GetAssumptionCache, PSI,
- &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
+ /*cg=*/nullptr, GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
- // Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS->getDebugLoc();
- BasicBlock *Block = CS.getParent();
-
- using namespace ore;
-
- InlineResult IR = InlineFunction(CS, IFI);
- if (!IR) {
- setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", &Callee) << " will not be inlined into "
- << NV("Caller", &F) << ": " << NV("Reason", IR.message);
- });
+ InlineResult IR = InlineFunction(*CB, IFI);
+ if (!IR.isSuccess()) {
+ Advice->recordUnsuccessfulInlining(IR);
continue;
}
+
DidInline = true;
InlinedCallees.insert(&Callee);
-
++NumInlined;
- emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
-
// Add any new callsites to defined functions to the worklist.
if (!IFI.InlinedCallSites.empty()) {
int NewHistoryID = InlineHistory.size();
InlineHistory.push_back({&Callee, InlineHistoryID});
- for (CallSite &CS : reverse(IFI.InlinedCallSites))
- if (Function *NewCallee = CS.getCalledFunction())
+
+ for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+ Function *NewCallee = ICB->getCalledFunction();
+ if (!NewCallee) {
+ // Try to promote an indirect (virtual) call without waiting for
+ // the post-inline cleanup and the next DevirtSCCRepeatedPass
+ // iteration because the next iteration may not happen and we may
+ // miss inlining it.
+ if (tryPromoteCall(*ICB))
+ NewCallee = ICB->getCalledFunction();
+ }
+ if (NewCallee)
if (!NewCallee->isDeclaration())
- Calls.push_back({CS, NewHistoryID});
+ Calls.push_back({ICB, NewHistoryID});
+ }
}
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
@@ -1112,15 +889,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// dead. In that case, we can drop the body of the function eagerly
// which may reduce the number of callers of other functions to one,
// changing inline cost thresholds.
+ bool CalleeWasDeleted = false;
if (Callee.hasLocalLinkage()) {
// To check this we also need to nuke any dead constant uses (perhaps
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
Calls.erase(
- std::remove_if(Calls.begin() + i + 1, Calls.end(),
- [&Callee](const std::pair<CallSite, int> &Call) {
- return Call.first.getCaller() == &Callee;
+ std::remove_if(Calls.begin() + I + 1, Calls.end(),
+ [&](const std::pair<CallBase *, int> &Call) {
+ return Call.first->getCaller() == &Callee;
}),
Calls.end());
// Clear the body and queue the function itself for deletion when we
@@ -1131,13 +909,18 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
"Cannot put cause a function to become dead twice!");
DeadFunctions.push_back(&Callee);
+ CalleeWasDeleted = true;
}
}
+ if (CalleeWasDeleted)
+ Advice->recordInliningWithCalleeDeleted();
+ else
+ Advice->recordInlining();
}
// Back the call index up by one to put us in a good position to go around
// the outer loop.
- --i;
+ --I;
if (!DidInline)
continue;
@@ -1163,8 +946,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// essentially do all of the same things as a function pass and we can
// re-use the exact same logic for updating the call graph to reflect the
// change.
+
+ // Inside the update, we also update the FunctionAnalysisManager in the
+ // proxy for this particular SCC. We do this as the SCC may have changed and
+ // as we're going to mutate this particular function we want to make sure
+ // the proxy is in place to forward any invalidation events.
LazyCallGraph::SCC *OldC = C;
- C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR, FAM);
LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
@@ -1208,11 +996,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// sets.
for (Function *DeadF : DeadFunctions) {
// Get the necessary information out of the call graph and nuke the
- // function there. Also, cclear out any cached analyses.
+ // function there. Also, clear out any cached analyses.
auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
- .getManager();
FAM.clear(*DeadF, DeadF->getName());
AM.clear(DeadC, DeadC.getName());
auto &DeadRC = DeadC.getOuterRefSCC();
@@ -1224,7 +1009,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
UR.InvalidatedRefSCCs.insert(&DeadRC);
// And delete the actual function from the module.
- M.getFunctionList().erase(DeadF);
+ // The Advisor may use Function pointers to efficiently index various
+ // internal maps, e.g. for memoization. Function cleanup passes like
+ // argument promotion create new functions. It is possible for a new
+ // function to be allocated at the address of a deleted function. We could
+ // index using names, but that's inefficient. Alternatively, we let the
+ // Advisor free the functions when it sees fit.
+ DeadF->getBasicBlockList().clear();
+ M.getFunctionList().remove(DeadF);
+
++NumDeleted;
}
@@ -1237,3 +1030,45 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
return PA;
}
+
+ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
+ bool Debugging,
+ InliningAdvisorMode Mode,
+ unsigned MaxDevirtIterations)
+ : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations),
+ PM(Debugging), MPM(Debugging) {
+ // Run the inliner first. The theory is that we are walking bottom-up and so
+ // the callees have already been fully optimized, and we want to inline them
+ // into the callers so that our optimizations can reflect that.
+ // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
+ // because it makes profile annotation in the backend inaccurate.
+ PM.addPass(InlinerPass());
+}
+
+PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
+ if (!IAA.tryCreate(Params, Mode)) {
+ M.getContext().emitError(
+ "Could not setup Inlining Advisor for the requested "
+ "mode and/or options");
+ return PreservedAnalyses::all();
+ }
+
+ // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
+ // to detect when we devirtualize indirect calls and iterate the SCC passes
+ // in that case to try and catch knock-on inlining or function attrs
+ // opportunities. Then we add it to the module pipeline by walking the SCCs
+ // in postorder (or bottom-up).
+ // If MaxDevirtIterations is 0, we just don't use the devirtualization
+ // wrapper.
+ if (MaxDevirtIterations == 0)
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM)));
+ else
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
+ auto Ret = MPM.run(M, MAM);
+
+ IAA.clear();
+ return Ret;
+}
diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index f7108e8002ac9..f7f5b4cf67041 100644
--- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -15,7 +15,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -36,22 +36,30 @@ using namespace llvm;
STATISTIC(NumExtracted, "Number of loops extracted");
namespace {
- struct LoopExtractor : public LoopPass {
+ struct LoopExtractor : public ModulePass {
static char ID; // Pass identification, replacement for typeid
+
+ // The number of natural loops to extract from the program into functions.
unsigned NumLoops;
explicit LoopExtractor(unsigned numLoops = ~0)
- : LoopPass(ID), NumLoops(numLoops) {
- initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
- }
+ : ModulePass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnLoop(Loop *L, LPPassManager &) override;
+ bool runOnModule(Module &M) override;
+ bool runOnFunction(Function &F);
+
+ bool extractLoops(Loop::iterator From, Loop::iterator To, LoopInfo &LI,
+ DominatorTree &DT);
+ bool extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
- AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
AU.addUsedIfAvailable<AssumptionCacheTracker>();
}
};
@@ -61,8 +69,9 @@ char LoopExtractor::ID = 0;
INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
"Extract loops into new functions", false, false)
INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
"Extract loops into new functions", false, false)
@@ -83,81 +92,130 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
//
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
-bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
+bool LoopExtractor::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ if (M.empty())
+ return false;
+
+ if (!NumLoops)
return false;
- // Only visit top-level loops.
- if (L->getParentLoop())
+ bool Changed = false;
+
+ // The end of the function list may change (new functions will be added at the
+ // end), so we run from the first to the current last.
+ auto I = M.begin(), E = --M.end();
+ while (true) {
+ Function &F = *I;
+
+ Changed |= runOnFunction(F);
+ if (!NumLoops)
+ break;
+
+ // If this is the last function.
+ if (I == E)
+ break;
+
+ ++I;
+ }
+ return Changed;
+}
+
+bool LoopExtractor::runOnFunction(Function &F) {
+ // Do not modify `optnone` functions.
+ if (F.hasOptNone())
return false;
- // If LoopSimplify form is not available, stay out of trouble.
- if (!L->isLoopSimplifyForm())
+ if (F.empty())
return false;
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
bool Changed = false;
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(F, &Changed).getLoopInfo();
+
+ // If there are no loops in the function.
+ if (LI.empty())
+ return Changed;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
// If there is more than one top-level loop in this function, extract all of
- // the loops. Otherwise there is exactly one top-level loop; in this case if
- // this function is more than a minimal wrapper around the loop, extract
- // the loop.
- bool ShouldExtractLoop = false;
-
- // Extract the loop if the entry block doesn't branch to the loop header.
- Instruction *EntryTI =
- L->getHeader()->getParent()->getEntryBlock().getTerminator();
- if (!isa<BranchInst>(EntryTI) ||
- !cast<BranchInst>(EntryTI)->isUnconditional() ||
- EntryTI->getSuccessor(0) != L->getHeader()) {
- ShouldExtractLoop = true;
- } else {
- // Check to see if any exits from the loop are more than just return
- // blocks.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
- ShouldExtractLoop = true;
- break;
- }
+ // the loops.
+ if (std::next(LI.begin()) != LI.end())
+ return Changed | extractLoops(LI.begin(), LI.end(), LI, DT);
+
+ // Otherwise there is exactly one top-level loop.
+ Loop *TLL = *LI.begin();
+
+ // If the loop is in LoopSimplify form, then extract it only if this function
+ // is more than a minimal wrapper around the loop.
+ if (TLL->isLoopSimplifyForm()) {
+ bool ShouldExtractLoop = false;
+
+ // Extract the loop if the entry block doesn't branch to the loop header.
+ Instruction *EntryTI = F.getEntryBlock().getTerminator();
+ if (!isa<BranchInst>(EntryTI) ||
+ !cast<BranchInst>(EntryTI)->isUnconditional() ||
+ EntryTI->getSuccessor(0) != TLL->getHeader()) {
+ ShouldExtractLoop = true;
+ } else {
+ // Check to see if any exits from the loop are more than just return
+ // blocks.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ TLL->getExitBlocks(ExitBlocks);
+ for (auto *ExitBlock : ExitBlocks)
+ if (!isa<ReturnInst>(ExitBlock->getTerminator())) {
+ ShouldExtractLoop = true;
+ break;
+ }
+ }
+
+ if (ShouldExtractLoop)
+ return Changed | extractLoop(TLL, LI, DT);
}
- if (ShouldExtractLoop) {
- // We must omit EH pads. EH pads must accompany the invoke
- // instruction. But this would result in a loop in the extracted
- // function. An infinite cycle occurs when it tries to extract that loop as
- // well.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i]->isEHPad()) {
- ShouldExtractLoop = false;
- break;
- }
+ // Okay, this function is a minimal container around the specified loop.
+ // If we extract the loop, we will continue to just keep extracting it
+ // infinitely... so don't extract it. However, if the loop contains any
+ // sub-loops, extract them.
+ return Changed | extractLoops(TLL->begin(), TLL->end(), LI, DT);
+}
+
+bool LoopExtractor::extractLoops(Loop::iterator From, Loop::iterator To,
+ LoopInfo &LI, DominatorTree &DT) {
+ bool Changed = false;
+ SmallVector<Loop *, 8> Loops;
+
+ // Save the list of loops, as it may change.
+ Loops.assign(From, To);
+ for (Loop *L : Loops) {
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ continue;
+
+ Changed |= extractLoop(L, LI, DT);
+ if (!NumLoops)
+ break;
}
+ return Changed;
+}
- if (ShouldExtractLoop) {
- if (NumLoops == 0) return Changed;
+bool LoopExtractor::extractLoop(Loop *L, LoopInfo &LI, DominatorTree &DT) {
+ assert(NumLoops != 0);
+ AssumptionCache *AC = nullptr;
+ Function &Func = *L->getHeader()->getParent();
+ if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+ AC = ACT->lookupAssumptionCache(Func);
+ CodeExtractorAnalysisCache CEAC(Func);
+ CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
+ if (Extractor.extractCodeRegion(CEAC)) {
+ LI.erase(L);
--NumLoops;
- AssumptionCache *AC = nullptr;
- Function &Func = *L->getHeader()->getParent();
- if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
- AC = ACT->lookupAssumptionCache(Func);
- CodeExtractorAnalysisCache CEAC(Func);
- CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
- if (Extractor.extractCodeRegion(CEAC) != nullptr) {
- Changed = true;
- // After extraction, the loop is replaced by a function call, so
- // we shouldn't try to run any more loop passes on it.
- LPM.markLoopAsDeleted(*L);
- LI.erase(L);
- }
++NumExtracted;
+ return true;
}
-
- return Changed;
+ return false;
}
// createSingleLoopExtractorPass - This pass extracts one natural loop from the
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index fa664966faf74..8eef7e3e7e999 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -382,6 +382,9 @@ class LowerTypeTestsModule {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
+ // Set when the client has invoked this to simply drop all type test assume
+ // sequences.
+ bool DropTypeTests;
Triple::ArchType Arch;
Triple::OSType OS;
@@ -500,7 +503,8 @@ class LowerTypeTestsModule {
public:
LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary);
+ const ModuleSummaryIndex *ImportSummary,
+ bool DropTypeTests);
bool lower();
@@ -516,22 +520,24 @@ struct LowerTypeTests : public ModulePass {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
+ bool DropTypeTests;
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
LowerTypeTests(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary)
+ const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
: ModulePass(ID), ExportSummary(ExportSummary),
- ImportSummary(ImportSummary) {
+ ImportSummary(ImportSummary), DropTypeTests(DropTypeTests) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (UseCommandLine)
return LowerTypeTestsModule::runForTesting(M);
- return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
+ return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
+ .lower();
}
};
@@ -544,8 +550,9 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
ModulePass *
llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary) {
- return new LowerTypeTests(ExportSummary, ImportSummary);
+ const ModuleSummaryIndex *ImportSummary,
+ bool DropTypeTests) {
+ return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests);
}
/// Build a bit set for TypeId using the object layouts in
@@ -728,6 +735,9 @@ static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
/// replace the call with.
Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
const TypeIdLowering &TIL) {
+ // Delay lowering if the resolution is currently unknown.
+ if (TIL.TheKind == TypeTestResolution::Unknown)
+ return nullptr;
if (TIL.TheKind == TypeTestResolution::Unsat)
return ConstantInt::getFalse(M.getContext());
@@ -828,11 +838,10 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
uint64_t DesiredPadding = 0;
for (GlobalTypeMember *G : Globals) {
auto *GV = cast<GlobalVariable>(G->getGlobal());
- MaybeAlign Alignment(GV->getAlignment());
- if (!Alignment)
- Alignment = Align(DL.getABITypeAlignment(GV->getValueType()));
- MaxAlign = std::max(MaxAlign, *Alignment);
- uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment);
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
+ MaxAlign = std::max(MaxAlign, Alignment);
+ uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, Alignment);
GlobalLayout[G] = GVOffset;
if (GVOffset != 0) {
uint64_t Padding = GVOffset - CurOffset;
@@ -1030,14 +1039,18 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
report_fatal_error("Second argument of llvm.type.test must be metadata");
auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
+ // If this is a local unpromoted type, which doesn't have a metadata string,
+ // treat as Unknown and delay lowering, so that we can still utilize it for
+ // later optimizations.
if (!TypeIdStr)
- report_fatal_error(
- "Second argument of llvm.type.test must be a metadata string");
+ return;
TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
+ if (Lowered) {
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
}
// ThinLTO backend: the function F has a jump table entry; update this module
@@ -1048,7 +1061,7 @@ void LowerTypeTestsModule::importFunction(
assert(F->getType()->getAddressSpace() == 0);
GlobalValue::VisibilityTypes Visibility = F->getVisibility();
- std::string Name = F->getName();
+ std::string Name = std::string(F->getName());
if (F->isDeclarationForLinker() && isJumpTableCanonical) {
// Non-dso_local functions may be overriden at run time,
@@ -1160,8 +1173,10 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
for (CallInst *CI : TIUI.CallSites) {
++NumTypeTestCallsLowered;
Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
- CI->replaceAllUsesWith(Lowered);
- CI->eraseFromParent();
+ if (Lowered) {
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
}
}
}
@@ -1269,7 +1284,7 @@ void LowerTypeTestsModule::moveInitializerToModuleConstructor(
IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator());
GV->setConstant(false);
- IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlignment());
+ IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlign());
GV->setInitializer(Constant::getNullValue(GV->getValueType()));
}
@@ -1516,13 +1531,13 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
F->getType());
if (Functions[I]->isExported()) {
if (IsJumpTableCanonical) {
- ExportSummary->cfiFunctionDefs().insert(F->getName());
+ ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
} else {
GlobalAlias *JtAlias = GlobalAlias::create(
F->getValueType(), 0, GlobalValue::ExternalLinkage,
F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
JtAlias->setVisibility(GlobalValue::HiddenVisibility);
- ExportSummary->cfiFunctionDecls().insert(F->getName());
+ ExportSummary->cfiFunctionDecls().insert(std::string(F->getName()));
}
}
if (!IsJumpTableCanonical) {
@@ -1655,8 +1670,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(
Module &M, ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary)
- : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+ const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
+ : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+ DropTypeTests(DropTypeTests) {
assert(!(ExportSummary && ImportSummary));
Triple TargetTriple(M.getTargetTriple());
Arch = TargetTriple.getArch();
@@ -1683,7 +1699,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool Changed =
LowerTypeTestsModule(
M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
- ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
+ /*DropTypeTests*/ false)
.lower();
if (!ClWriteSummary.empty()) {
@@ -1703,8 +1720,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
static bool isDirectCall(Use& U) {
auto *Usr = dyn_cast<CallInst>(U.getUser());
if (Usr) {
- CallSite CS(Usr);
- if (CS.isCallee(&U))
+ auto *CB = dyn_cast<CallBase>(Usr);
+ if (CB && CB->isCallee(&U))
return true;
}
return false;
@@ -1750,6 +1767,33 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
}
bool LowerTypeTestsModule::lower() {
+ Function *TypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+
+ if (DropTypeTests && TypeTestFunc) {
+ for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
+ UI != UE;) {
+ auto *CI = cast<CallInst>((*UI++).getUser());
+ // Find and erase llvm.assume intrinsics for this llvm.type.test call.
+ for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;) {
+ if (auto *AssumeCI = dyn_cast<CallInst>((*CIU++).getUser())) {
+ Function *F = AssumeCI->getCalledFunction();
+ if (F && F->getIntrinsicID() == Intrinsic::assume)
+ AssumeCI->eraseFromParent();
+ }
+ }
+ CI->eraseFromParent();
+ }
+
+ // We have deleted the type intrinsics, so we no longer have enough
+ // information to reason about the liveness of virtual function pointers
+ // in GlobalDCE.
+ for (GlobalVariable &GV : M.globals())
+ GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
+
+ return true;
+ }
+
// If only some of the modules were split, we cannot correctly perform
// this transformation. We already checked for the presense of type tests
// with partially split modules during the thin link, and would have emitted
@@ -1758,8 +1802,6 @@ bool LowerTypeTestsModule::lower() {
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
return false;
- Function *TypeTestFunc =
- M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *ICallBranchFunnelFunc =
M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel));
if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
@@ -1787,9 +1829,10 @@ bool LowerTypeTestsModule::lower() {
// have the same name, but it's not the one we are looking for.
if (F.hasLocalLinkage())
continue;
- if (ImportSummary->cfiFunctionDefs().count(F.getName()))
+ if (ImportSummary->cfiFunctionDefs().count(std::string(F.getName())))
Defs.push_back(&F);
- else if (ImportSummary->cfiFunctionDecls().count(F.getName()))
+ else if (ImportSummary->cfiFunctionDecls().count(
+ std::string(F.getName())))
Decls.push_back(&F);
}
@@ -2196,7 +2239,9 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
+ bool Changed =
+ LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
+ .lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 06d2a2f319412..8cc19515f3db8 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -95,7 +95,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -467,13 +466,13 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
Use *U = &*UI;
++UI;
- CallSite CS(U->getUser());
- if (CS && CS.isCallee(U)) {
+ CallBase *CB = dyn_cast<CallBase>(U->getUser());
+ if (CB && CB->isCallee(U)) {
// Do not copy attributes from the called function to the call-site.
// Function comparison ensures that the attributes are the same up to
// type congruences in byval(), in which case we need to keep the byval
// type of the call-site, not the callee function.
- remove(CS.getInstruction()->getFunction());
+ remove(CB->getFunction());
U->set(BitcastNew);
}
}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
new file mode 100644
index 0000000000000..f664a24173747
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -0,0 +1,1501 @@
+//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// OpenMP specific optimizations:
+//
+// - Deduplication of runtime calls, e.g., omp_get_thread_num.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+
+#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+
+using namespace llvm;
+using namespace omp;
+
+#define DEBUG_TYPE "openmp-opt"
+
+static cl::opt<bool> DisableOpenMPOptimizations(
+ "openmp-opt-disable", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
+ cl::Hidden);
+static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
+ "Number of OpenMP runtime calls deduplicated");
+STATISTIC(NumOpenMPParallelRegionsDeleted,
+ "Number of OpenMP parallel regions deleted");
+STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
+ "Number of OpenMP runtime functions identified");
+STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
+ "Number of OpenMP runtime function uses identified");
+STATISTIC(NumOpenMPTargetRegionKernels,
+ "Number of OpenMP target region entry points (=kernels) identified");
+STATISTIC(
+ NumOpenMPParallelRegionsReplacedInGPUStateMachine,
+ "Number of OpenMP parallel regions replaced with ID in GPU state machines");
+
+#if !defined(NDEBUG)
+static constexpr auto TAG = "[" DEBUG_TYPE "]";
+#endif
+
+/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
+/// true, constant expression users are not given to \p CB but their uses are
+/// traversed transitively.
+template <typename CBTy>
+static void foreachUse(Function &F, CBTy CB,
+ bool LookThroughConstantExprUses = true) {
+ SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
+
+ for (unsigned idx = 0; idx < Worklist.size(); ++idx) {
+ Use &U = *Worklist[idx];
+
+ // Allow use in constant bitcasts and simply look through them.
+ if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
+ for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
+ Worklist.push_back(&CEU);
+ continue;
+ }
+
+ CB(U);
+ }
+}
+
+/// Helper struct to store tracked ICV values at specif instructions.
+struct ICVValue {
+ Instruction *Inst;
+ Value *TrackedValue;
+
+ ICVValue(Instruction *I, Value *Val) : Inst(I), TrackedValue(Val) {}
+};
+
+namespace llvm {
+
+// Provide DenseMapInfo for ICVValue
+template <> struct DenseMapInfo<ICVValue> {
+ using InstInfo = DenseMapInfo<Instruction *>;
+ using ValueInfo = DenseMapInfo<Value *>;
+
+ static inline ICVValue getEmptyKey() {
+ return ICVValue(InstInfo::getEmptyKey(), ValueInfo::getEmptyKey());
+ };
+
+ static inline ICVValue getTombstoneKey() {
+ return ICVValue(InstInfo::getTombstoneKey(), ValueInfo::getTombstoneKey());
+ };
+
+ static unsigned getHashValue(const ICVValue &ICVVal) {
+ return detail::combineHashValue(
+ InstInfo::getHashValue(ICVVal.Inst),
+ ValueInfo::getHashValue(ICVVal.TrackedValue));
+ }
+
+ static bool isEqual(const ICVValue &LHS, const ICVValue &RHS) {
+ return InstInfo::isEqual(LHS.Inst, RHS.Inst) &&
+ ValueInfo::isEqual(LHS.TrackedValue, RHS.TrackedValue);
+ }
+};
+
+} // end namespace llvm
+
+namespace {
+
+struct AAICVTracker;
+
+/// OpenMP specific information. For now, stores RFIs and ICVs also needed for
+/// Attributor runs.
+struct OMPInformationCache : public InformationCache {
+ OMPInformationCache(Module &M, AnalysisGetter &AG,
+ BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
+ SmallPtrSetImpl<Kernel> &Kernels)
+ : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
+ Kernels(Kernels) {
+ initializeModuleSlice(CGSCC);
+
+ OMPBuilder.initialize();
+ initializeRuntimeFunctions();
+ initializeInternalControlVars();
+ }
+
+ /// Generic information that describes an internal control variable.
+ struct InternalControlVarInfo {
+ /// The kind, as described by InternalControlVar enum.
+ InternalControlVar Kind;
+
+ /// The name of the ICV.
+ StringRef Name;
+
+ /// Environment variable associated with this ICV.
+ StringRef EnvVarName;
+
+ /// Initial value kind.
+ ICVInitValue InitKind;
+
+ /// Initial value.
+ ConstantInt *InitValue;
+
+ /// Setter RTL function associated with this ICV.
+ RuntimeFunction Setter;
+
+ /// Getter RTL function associated with this ICV.
+ RuntimeFunction Getter;
+
+ /// RTL Function corresponding to the override clause of this ICV
+ RuntimeFunction Clause;
+ };
+
+ /// Generic information that describes a runtime function
+ struct RuntimeFunctionInfo {
+
+ /// The kind, as described by the RuntimeFunction enum.
+ RuntimeFunction Kind;
+
+ /// The name of the function.
+ StringRef Name;
+
+ /// Flag to indicate a variadic function.
+ bool IsVarArg;
+
+ /// The return type of the function.
+ Type *ReturnType;
+
+ /// The argument types of the function.
+ SmallVector<Type *, 8> ArgumentTypes;
+
+ /// The declaration if available.
+ Function *Declaration = nullptr;
+
+ /// Uses of this runtime function per function containing the use.
+ using UseVector = SmallVector<Use *, 16>;
+
+ /// Clear UsesMap for runtime function.
+ void clearUsesMap() { UsesMap.clear(); }
+
+ /// Boolean conversion that is true if the runtime function was found.
+ operator bool() const { return Declaration; }
+
+ /// Return the vector of uses in function \p F.
+ UseVector &getOrCreateUseVector(Function *F) {
+ std::shared_ptr<UseVector> &UV = UsesMap[F];
+ if (!UV)
+ UV = std::make_shared<UseVector>();
+ return *UV;
+ }
+
+ /// Return the vector of uses in function \p F or `nullptr` if there are
+ /// none.
+ const UseVector *getUseVector(Function &F) const {
+ auto I = UsesMap.find(&F);
+ if (I != UsesMap.end())
+ return I->second.get();
+ return nullptr;
+ }
+
+ /// Return how many functions contain uses of this runtime function.
+ size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
+
+ /// Return the number of arguments (or the minimal number for variadic
+ /// functions).
+ size_t getNumArgs() const { return ArgumentTypes.size(); }
+
+ /// Run the callback \p CB on each use and forget the use if the result is
+ /// true. The callback will be fed the function in which the use was
+ /// encountered as second argument.
+ void foreachUse(SmallVectorImpl<Function *> &SCC,
+ function_ref<bool(Use &, Function &)> CB) {
+ for (Function *F : SCC)
+ foreachUse(CB, F);
+ }
+
+ /// Run the callback \p CB on each use within the function \p F and forget
+ /// the use if the result is true.
+ void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
+ SmallVector<unsigned, 8> ToBeDeleted;
+ ToBeDeleted.clear();
+
+ unsigned Idx = 0;
+ UseVector &UV = getOrCreateUseVector(F);
+
+ for (Use *U : UV) {
+ if (CB(*U, *F))
+ ToBeDeleted.push_back(Idx);
+ ++Idx;
+ }
+
+ // Remove the to-be-deleted indices in reverse order as prior
+ // modifications will not modify the smaller indices.
+ while (!ToBeDeleted.empty()) {
+ unsigned Idx = ToBeDeleted.pop_back_val();
+ UV[Idx] = UV.back();
+ UV.pop_back();
+ }
+ }
+
+ private:
+ /// Map from functions to all uses of this runtime function contained in
+ /// them.
+ DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
+ };
+
+ /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
+ /// (a subset of) all functions that we can look at during this SCC traversal.
+ /// This includes functions (transitively) called from the SCC and the
+ /// (transitive) callers of SCC functions. We also can look at a function if
+ /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
+ /// a function in the SCC or a caller of a function in the SCC.
+ void initializeModuleSlice(SetVector<Function *> &SCC) {
+ ModuleSlice.insert(SCC.begin(), SCC.end());
+
+ SmallPtrSet<Function *, 16> Seen;
+ SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
+ while (!Worklist.empty()) {
+ Function *F = Worklist.pop_back_val();
+ ModuleSlice.insert(F);
+
+ for (Instruction &I : instructions(*F))
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction())
+ if (Seen.insert(Callee).second)
+ Worklist.push_back(Callee);
+ }
+
+ Seen.clear();
+ Worklist.append(SCC.begin(), SCC.end());
+ while (!Worklist.empty()) {
+ Function *F = Worklist.pop_back_val();
+ ModuleSlice.insert(F);
+
+ // Traverse all transitive uses.
+ foreachUse(*F, [&](Use &U) {
+ if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
+ if (Seen.insert(UsrI->getFunction()).second)
+ Worklist.push_back(UsrI->getFunction());
+ });
+ }
+ }
+
+ /// The slice of the module we are allowed to look at.
+ SmallPtrSet<Function *, 8> ModuleSlice;
+
+ /// An OpenMP-IR-Builder instance
+ OpenMPIRBuilder OMPBuilder;
+
+ /// Map from runtime function kind to the runtime function description.
+ EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
+ RuntimeFunction::OMPRTL___last>
+ RFIs;
+
+ /// Map from ICV kind to the ICV description.
+ EnumeratedArray<InternalControlVarInfo, InternalControlVar,
+ InternalControlVar::ICV___last>
+ ICVs;
+
+ /// Helper to initialize all internal control variable information for those
+ /// defined in OMPKinds.def.
+ void initializeInternalControlVars() {
+#define ICV_RT_SET(_Name, RTL) \
+ { \
+ auto &ICV = ICVs[_Name]; \
+ ICV.Setter = RTL; \
+ }
+#define ICV_RT_GET(Name, RTL) \
+ { \
+ auto &ICV = ICVs[Name]; \
+ ICV.Getter = RTL; \
+ }
+#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
+ { \
+ auto &ICV = ICVs[Enum]; \
+ ICV.Name = _Name; \
+ ICV.Kind = Enum; \
+ ICV.InitKind = Init; \
+ ICV.EnvVarName = _EnvVarName; \
+ switch (ICV.InitKind) { \
+ case ICV_IMPLEMENTATION_DEFINED: \
+ ICV.InitValue = nullptr; \
+ break; \
+ case ICV_ZERO: \
+ ICV.InitValue = ConstantInt::get( \
+ Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
+ break; \
+ case ICV_FALSE: \
+ ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
+ break; \
+ case ICV_LAST: \
+ break; \
+ } \
+ }
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ }
+
+ /// Returns true if the function declaration \p F matches the runtime
+ /// function types, that is, return type \p RTFRetType, and argument types
+ /// \p RTFArgTypes.
+ static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
+ SmallVector<Type *, 8> &RTFArgTypes) {
+ // TODO: We should output information to the user (under debug output
+ // and via remarks).
+
+ if (!F)
+ return false;
+ if (F->getReturnType() != RTFRetType)
+ return false;
+ if (F->arg_size() != RTFArgTypes.size())
+ return false;
+
+ auto RTFTyIt = RTFArgTypes.begin();
+ for (Argument &Arg : F->args()) {
+ if (Arg.getType() != *RTFTyIt)
+ return false;
+
+ ++RTFTyIt;
+ }
+
+ return true;
+ }
+
+ // Helper to collect all uses of the declaration in the UsesMap.
+ unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
+ unsigned NumUses = 0;
+ if (!RFI.Declaration)
+ return NumUses;
+ OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
+
+ if (CollectStats) {
+ NumOpenMPRuntimeFunctionsIdentified += 1;
+ NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
+ }
+
+ // TODO: We directly convert uses into proper calls and unknown uses.
+ for (Use &U : RFI.Declaration->uses()) {
+ if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
+ if (ModuleSlice.count(UserI->getFunction())) {
+ RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
+ ++NumUses;
+ }
+ } else {
+ RFI.getOrCreateUseVector(nullptr).push_back(&U);
+ ++NumUses;
+ }
+ }
+ return NumUses;
+ }
+
+ // Helper function to recollect uses of all runtime functions.
+ void recollectUses() {
+ for (int Idx = 0; Idx < RFIs.size(); ++Idx) {
+ auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)];
+ RFI.clearUsesMap();
+ collectUses(RFI, /*CollectStats*/ false);
+ }
+ }
+
+ /// Helper to initialize all runtime function information for those defined
+ /// in OpenMPKinds.def.
+ void initializeRuntimeFunctions() {
+ Module &M = *((*ModuleSlice.begin())->getParent());
+
+ // Helper macros for handling __VA_ARGS__ in OMP_RTL
+#define OMP_TYPE(VarName, ...) \
+ Type *VarName = OMPBuilder.VarName; \
+ (void)VarName;
+
+#define OMP_ARRAY_TYPE(VarName, ...) \
+ ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
+ (void)VarName##Ty; \
+ PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
+ (void)VarName##PtrTy;
+
+#define OMP_FUNCTION_TYPE(VarName, ...) \
+ FunctionType *VarName = OMPBuilder.VarName; \
+ (void)VarName; \
+ PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
+ (void)VarName##Ptr;
+
+#define OMP_STRUCT_TYPE(VarName, ...) \
+ StructType *VarName = OMPBuilder.VarName; \
+ (void)VarName; \
+ PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
+ (void)VarName##Ptr;
+
+#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
+ { \
+ SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
+ Function *F = M.getFunction(_Name); \
+ if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
+ auto &RFI = RFIs[_Enum]; \
+ RFI.Kind = _Enum; \
+ RFI.Name = _Name; \
+ RFI.IsVarArg = _IsVarArg; \
+ RFI.ReturnType = OMPBuilder._ReturnType; \
+ RFI.ArgumentTypes = std::move(ArgsTypes); \
+ RFI.Declaration = F; \
+ unsigned NumUses = collectUses(RFI); \
+ (void)NumUses; \
+ LLVM_DEBUG({ \
+ dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
+ << " found\n"; \
+ if (RFI.Declaration) \
+ dbgs() << TAG << "-> got " << NumUses << " uses in " \
+ << RFI.getNumFunctionsWithUses() \
+ << " different functions.\n"; \
+ }); \
+ } \
+ }
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+ // TODO: We should attach the attributes defined in OMPKinds.def.
+ }
+
+ /// Collection of known kernels (\see Kernel) in the module.
+ SmallPtrSetImpl<Kernel> &Kernels;
+};
+
+struct OpenMPOpt {
+
+ using OptimizationRemarkGetter =
+ function_ref<OptimizationRemarkEmitter &(Function *)>;
+
+ OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
+ OptimizationRemarkGetter OREGetter,
+ OMPInformationCache &OMPInfoCache, Attributor &A)
+ : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
+ OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
+
+ /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
+ bool run() {
+ if (SCC.empty())
+ return false;
+
+ bool Changed = false;
+
+ LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
+ << " functions in a slice with "
+ << OMPInfoCache.ModuleSlice.size() << " functions\n");
+
+ if (PrintICVValues)
+ printICVs();
+ if (PrintOpenMPKernels)
+ printKernels();
+
+ Changed |= rewriteDeviceCodeStateMachine();
+
+ Changed |= runAttributor();
+
+ // Recollect uses, in case Attributor deleted any.
+ OMPInfoCache.recollectUses();
+
+ Changed |= deduplicateRuntimeCalls();
+ Changed |= deleteParallelRegions();
+
+ return Changed;
+ }
+
+ /// Print initial ICV values for testing.
+ /// FIXME: This should be done from the Attributor once it is added.
+ void printICVs() const {
+ InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
+
+ for (Function *F : OMPInfoCache.ModuleSlice) {
+ for (auto ICV : ICVs) {
+ auto ICVInfo = OMPInfoCache.ICVs[ICV];
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
+ << " Value: "
+ << (ICVInfo.InitValue
+ ? ICVInfo.InitValue->getValue().toString(10, true)
+ : "IMPLEMENTATION_DEFINED");
+ };
+
+ emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
+ }
+ }
+ }
+
+ /// Print OpenMP GPU kernels for testing.
+ void printKernels() const {
+ for (Function *F : SCC) {
+ if (!OMPInfoCache.Kernels.count(F))
+ continue;
+
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "OpenMP GPU kernel "
+ << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
+ };
+
+ emitRemarkOnFunction(F, "OpenMPGPU", Remark);
+ }
+ }
+
+ /// Return the call if \p U is a callee use in a regular call. If \p RFI is
+ /// given it has to be the callee or a nullptr is returned.
+ static CallInst *getCallIfRegularCall(
+ Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
+ CallInst *CI = dyn_cast<CallInst>(U.getUser());
+ if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
+ (!RFI || CI->getCalledFunction() == RFI->Declaration))
+ return CI;
+ return nullptr;
+ }
+
+ /// Return the call if \p V is a regular call. If \p RFI is given it has to be
+ /// the callee or a nullptr is returned.
+ static CallInst *getCallIfRegularCall(
+ Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
+ CallInst *CI = dyn_cast<CallInst>(&V);
+ if (CI && !CI->hasOperandBundles() &&
+ (!RFI || CI->getCalledFunction() == RFI->Declaration))
+ return CI;
+ return nullptr;
+ }
+
+private:
+ /// Try to delete parallel regions if possible.
+ bool deleteParallelRegions() {
+ const unsigned CallbackCalleeOperand = 2;
+
+ OMPInformationCache::RuntimeFunctionInfo &RFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
+
+ if (!RFI.Declaration)
+ return false;
+
+ bool Changed = false;
+ auto DeleteCallCB = [&](Use &U, Function &) {
+ CallInst *CI = getCallIfRegularCall(U);
+ if (!CI)
+ return false;
+ auto *Fn = dyn_cast<Function>(
+ CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
+ if (!Fn)
+ return false;
+ if (!Fn->onlyReadsMemory())
+ return false;
+ if (!Fn->hasFnAttribute(Attribute::WillReturn))
+ return false;
+
+ LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
+ << CI->getCaller()->getName() << "\n");
+
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Parallel region in "
+ << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
+ << " deleted";
+ };
+ emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
+ Remark);
+
+ CGUpdater.removeCallSite(*CI);
+ CI->eraseFromParent();
+ Changed = true;
+ ++NumOpenMPParallelRegionsDeleted;
+ return true;
+ };
+
+ RFI.foreachUse(SCC, DeleteCallCB);
+
+ return Changed;
+ }
+
+ /// Try to eliminate runtime calls by reusing existing ones.
+ bool deduplicateRuntimeCalls() {
+ bool Changed = false;
+
+ RuntimeFunction DeduplicableRuntimeCallIDs[] = {
+ OMPRTL_omp_get_num_threads,
+ OMPRTL_omp_in_parallel,
+ OMPRTL_omp_get_cancellation,
+ OMPRTL_omp_get_thread_limit,
+ OMPRTL_omp_get_supported_active_levels,
+ OMPRTL_omp_get_level,
+ OMPRTL_omp_get_ancestor_thread_num,
+ OMPRTL_omp_get_team_size,
+ OMPRTL_omp_get_active_level,
+ OMPRTL_omp_in_final,
+ OMPRTL_omp_get_proc_bind,
+ OMPRTL_omp_get_num_places,
+ OMPRTL_omp_get_num_procs,
+ OMPRTL_omp_get_place_num,
+ OMPRTL_omp_get_partition_num_places,
+ OMPRTL_omp_get_partition_place_nums};
+
+ // Global-tid is handled separately.
+ SmallSetVector<Value *, 16> GTIdArgs;
+ collectGlobalThreadIdArguments(GTIdArgs);
+ LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
+ << " global thread ID arguments\n");
+
+ for (Function *F : SCC) {
+ for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
+ deduplicateRuntimeCalls(*F,
+ OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
+
+ // __kmpc_global_thread_num is special as we can replace it with an
+ // argument in enough cases to make it worth trying.
+ Value *GTIdArg = nullptr;
+ for (Argument &Arg : F->args())
+ if (GTIdArgs.count(&Arg)) {
+ GTIdArg = &Arg;
+ break;
+ }
+ Changed |= deduplicateRuntimeCalls(
+ *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
+ }
+
+ return Changed;
+ }
+
+ static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
+ bool GlobalOnly, bool &SingleChoice) {
+ if (CurrentIdent == NextIdent)
+ return CurrentIdent;
+
+ // TODO: Figure out how to actually combine multiple debug locations. For
+ // now we just keep an existing one if there is a single choice.
+ if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
+ SingleChoice = !CurrentIdent;
+ return NextIdent;
+ }
+ return nullptr;
+ }
+
+ /// Return an `struct ident_t*` value that represents the ones used in the
+ /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
+ /// return a local `struct ident_t*`. For now, if we cannot find a suitable
+ /// return value we create one from scratch. We also do not yet combine
+ /// information, e.g., the source locations, see combinedIdentStruct.
+ Value *
+ getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
+ Function &F, bool GlobalOnly) {
+ bool SingleChoice = true;
+ Value *Ident = nullptr;
+ auto CombineIdentStruct = [&](Use &U, Function &Caller) {
+ CallInst *CI = getCallIfRegularCall(U, &RFI);
+ if (!CI || &F != &Caller)
+ return false;
+ Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
+ /* GlobalOnly */ true, SingleChoice);
+ return false;
+ };
+ RFI.foreachUse(SCC, CombineIdentStruct);
+
+ if (!Ident || !SingleChoice) {
+ // The IRBuilder uses the insertion block to get to the module, this is
+ // unfortunate but we work around it for now.
+ if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
+ OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
+ &F.getEntryBlock(), F.getEntryBlock().begin()));
+ // Create a fallback location if non was found.
+ // TODO: Use the debug locations of the calls instead.
+ Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
+ Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
+ }
+ return Ident;
+ }
+
+ /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
+ /// \p ReplVal if given.
+ bool deduplicateRuntimeCalls(Function &F,
+ OMPInformationCache::RuntimeFunctionInfo &RFI,
+ Value *ReplVal = nullptr) {
+ auto *UV = RFI.getUseVector(F);
+ if (!UV || UV->size() + (ReplVal != nullptr) < 2)
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
+ << (ReplVal ? " with an existing value\n" : "\n") << "\n");
+
+ assert((!ReplVal || (isa<Argument>(ReplVal) &&
+ cast<Argument>(ReplVal)->getParent() == &F)) &&
+ "Unexpected replacement value!");
+
+ // TODO: Use dominance to find a good position instead.
+ auto CanBeMoved = [this](CallBase &CB) {
+ unsigned NumArgs = CB.getNumArgOperands();
+ if (NumArgs == 0)
+ return true;
+ if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
+ return false;
+ for (unsigned u = 1; u < NumArgs; ++u)
+ if (isa<Instruction>(CB.getArgOperand(u)))
+ return false;
+ return true;
+ };
+
+ if (!ReplVal) {
+ for (Use *U : *UV)
+ if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
+ if (!CanBeMoved(*CI))
+ continue;
+
+ auto Remark = [&](OptimizationRemark OR) {
+ auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
+ return OR << "OpenMP runtime call "
+ << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
+ << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
+ };
+ emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
+
+ CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
+ ReplVal = CI;
+ break;
+ }
+ if (!ReplVal)
+ return false;
+ }
+
+ // If we use a call as a replacement value we need to make sure the ident is
+ // valid at the new location. For now we just pick a global one, either
+ // existing and used by one of the calls, or created from scratch.
+ if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
+ if (CI->getNumArgOperands() > 0 &&
+ CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
+ Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
+ /* GlobalOnly */ true);
+ CI->setArgOperand(0, Ident);
+ }
+ }
+
+ bool Changed = false;
+ auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
+ CallInst *CI = getCallIfRegularCall(U, &RFI);
+ if (!CI || CI == ReplVal || &F != &Caller)
+ return false;
+ assert(CI->getCaller() == &F && "Unexpected call!");
+
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "OpenMP runtime call "
+ << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
+ };
+ emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
+
+ CGUpdater.removeCallSite(*CI);
+ CI->replaceAllUsesWith(ReplVal);
+ CI->eraseFromParent();
+ ++NumOpenMPRuntimeCallsDeduplicated;
+ Changed = true;
+ return true;
+ };
+ RFI.foreachUse(SCC, ReplaceAndDeleteCB);
+
+ return Changed;
+ }
+
+ /// Collect arguments that represent the global thread id in \p GTIdArgs.
+ void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
+ // TODO: Below we basically perform a fixpoint iteration with a pessimistic
+ // initialization. We could define an AbstractAttribute instead and
+ // run the Attributor here once it can be run as an SCC pass.
+
+ // Helper to check the argument \p ArgNo at all call sites of \p F for
+ // a GTId.
+ auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
+ if (!F.hasLocalLinkage())
+ return false;
+ for (Use &U : F.uses()) {
+ if (CallInst *CI = getCallIfRegularCall(U)) {
+ Value *ArgOp = CI->getArgOperand(ArgNo);
+ if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
+ getCallIfRegularCall(
+ *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
+ continue;
+ }
+ return false;
+ }
+ return true;
+ };
+
+ // Helper to identify uses of a GTId as GTId arguments.
+ auto AddUserArgs = [&](Value &GTId) {
+ for (Use &U : GTId.uses())
+ if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
+ if (CI->isArgOperand(&U))
+ if (Function *Callee = CI->getCalledFunction())
+ if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
+ GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
+ };
+
+ // The argument users of __kmpc_global_thread_num calls are GTIds.
+ OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
+
+ GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
+ if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
+ AddUserArgs(*CI);
+ return false;
+ });
+
+ // Transitively search for more arguments by looking at the users of the
+ // ones we know already. During the search the GTIdArgs vector is extended
+ // so we cannot cache the size nor can we use a range based for.
+ for (unsigned u = 0; u < GTIdArgs.size(); ++u)
+ AddUserArgs(*GTIdArgs[u]);
+ }
+
+ /// Kernel (=GPU) optimizations and utility functions
+ ///
+ ///{{
+
+ /// Check if \p F is a kernel, hence entry point for target offloading.
+ bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
+
+ /// Cache to remember the unique kernel for a function.
+ DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
+
+ /// Find the unique kernel that will execute \p F, if any.
+ Kernel getUniqueKernelFor(Function &F);
+
+ /// Find the unique kernel that will execute \p I, if any.
+ Kernel getUniqueKernelFor(Instruction &I) {
+ return getUniqueKernelFor(*I.getFunction());
+ }
+
+ /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
+ /// the cases we can avoid taking the address of a function.
+ bool rewriteDeviceCodeStateMachine();
+
+ ///
+ ///}}
+
+ /// Emit a remark generically
+ ///
+ /// This template function can be used to generically emit a remark. The
+ /// RemarkKind should be one of the following:
+ /// - OptimizationRemark to indicate a successful optimization attempt
+ /// - OptimizationRemarkMissed to report a failed optimization attempt
+ /// - OptimizationRemarkAnalysis to provide additional information about an
+ /// optimization attempt
+ ///
+ /// The remark is built using a callback function provided by the caller that
+ /// takes a RemarkKind as input and returns a RemarkKind.
+ template <typename RemarkKind,
+ typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
+ void emitRemark(Instruction *Inst, StringRef RemarkName,
+ RemarkCallBack &&RemarkCB) const {
+ Function *F = Inst->getParent()->getParent();
+ auto &ORE = OREGetter(F);
+
+ ORE.emit(
+ [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); });
+ }
+
+ /// Emit a remark on a function. Since only OptimizationRemark is supporting
+ /// this, it can't be made generic.
+ void
+ emitRemarkOnFunction(Function *F, StringRef RemarkName,
+ function_ref<OptimizationRemark(OptimizationRemark &&)>
+ &&RemarkCB) const {
+ auto &ORE = OREGetter(F);
+
+ ORE.emit([&]() {
+ return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F));
+ });
+ }
+
+ /// The underlying module.
+ Module &M;
+
+ /// The SCC we are operating on.
+ SmallVectorImpl<Function *> &SCC;
+
+ /// Callback to update the call graph, the first argument is a removed call,
+ /// the second an optional replacement call.
+ CallGraphUpdater &CGUpdater;
+
+ /// Callback to get an OptimizationRemarkEmitter from a Function *
+ OptimizationRemarkGetter OREGetter;
+
+ /// OpenMP-specific information cache. Also Used for Attributor runs.
+ OMPInformationCache &OMPInfoCache;
+
+ /// Attributor instance.
+ Attributor &A;
+
+ /// Helper function to run Attributor on SCC.
+ bool runAttributor() {
+ if (SCC.empty())
+ return false;
+
+ registerAAs();
+
+ ChangeStatus Changed = A.run();
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
+ << " functions, result: " << Changed << ".\n");
+
+ return Changed == ChangeStatus::CHANGED;
+ }
+
+ /// Populate the Attributor with abstract attribute opportunities in the
+ /// function.
+ void registerAAs() {
+ for (Function *F : SCC) {
+ if (F->isDeclaration())
+ continue;
+
+ A.getOrCreateAAFor<AAICVTracker>(IRPosition::function(*F));
+ }
+ }
+};
+
+Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
+ if (!OMPInfoCache.ModuleSlice.count(&F))
+ return nullptr;
+
+ // Use a scope to keep the lifetime of the CachedKernel short.
+ {
+ Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
+ if (CachedKernel)
+ return *CachedKernel;
+
+ // TODO: We should use an AA to create an (optimistic and callback
+ // call-aware) call graph. For now we stick to simple patterns that
+ // are less powerful, basically the worst fixpoint.
+ if (isKernel(F)) {
+ CachedKernel = Kernel(&F);
+ return *CachedKernel;
+ }
+
+ CachedKernel = nullptr;
+ if (!F.hasLocalLinkage())
+ return nullptr;
+ }
+
+ auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
+ if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+ // Allow use in equality comparisons.
+ if (Cmp->isEquality())
+ return getUniqueKernelFor(*Cmp);
+ return nullptr;
+ }
+ if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
+ // Allow direct calls.
+ if (CB->isCallee(&U))
+ return getUniqueKernelFor(*CB);
+ // Allow the use in __kmpc_kernel_prepare_parallel calls.
+ if (Function *Callee = CB->getCalledFunction())
+ if (Callee->getName() == "__kmpc_kernel_prepare_parallel")
+ return getUniqueKernelFor(*CB);
+ return nullptr;
+ }
+ // Disallow every other use.
+ return nullptr;
+ };
+
+ // TODO: In the future we want to track more than just a unique kernel.
+ SmallPtrSet<Kernel, 2> PotentialKernels;
+ foreachUse(F, [&](const Use &U) {
+ PotentialKernels.insert(GetUniqueKernelForUse(U));
+ });
+
+ Kernel K = nullptr;
+ if (PotentialKernels.size() == 1)
+ K = *PotentialKernels.begin();
+
+ // Cache the result.
+ UniqueKernelMap[&F] = K;
+
+ return K;
+}
+
+bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
+ OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel];
+
+ bool Changed = false;
+ if (!KernelPrepareParallelRFI)
+ return Changed;
+
+ for (Function *F : SCC) {
+
+ // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at
+ // all.
+ bool UnknownUse = false;
+ bool KernelPrepareUse = false;
+ unsigned NumDirectCalls = 0;
+
+ SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
+ foreachUse(*F, [&](Use &U) {
+ if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+ if (CB->isCallee(&U)) {
+ ++NumDirectCalls;
+ return;
+ }
+
+ if (isa<ICmpInst>(U.getUser())) {
+ ToBeReplacedStateMachineUses.push_back(&U);
+ return;
+ }
+ if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall(
+ *U.getUser(), &KernelPrepareParallelRFI)) {
+ KernelPrepareUse = true;
+ ToBeReplacedStateMachineUses.push_back(&U);
+ return;
+ }
+ UnknownUse = true;
+ });
+
+ // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel
+ // use.
+ if (!KernelPrepareUse)
+ continue;
+
+ {
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Found a parallel region that is called in a target "
+ "region but not part of a combined target construct nor "
+ "nesed inside a target construct without intermediate "
+ "code. This can lead to excessive register usage for "
+ "unrelated target regions in the same translation unit "
+ "due to spurious call edges assumed by ptxas.";
+ };
+ emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
+ }
+
+ // If this ever hits, we should investigate.
+ // TODO: Checking the number of uses is not a necessary restriction and
+ // should be lifted.
+ if (UnknownUse || NumDirectCalls != 1 ||
+ ToBeReplacedStateMachineUses.size() != 2) {
+ {
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Parallel region is used in "
+ << (UnknownUse ? "unknown" : "unexpected")
+ << " ways; will not attempt to rewrite the state machine.";
+ };
+ emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
+ }
+ continue;
+ }
+
+ // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give
+ // up if the function is not called from a unique kernel.
+ Kernel K = getUniqueKernelFor(*F);
+ if (!K) {
+ {
+ auto Remark = [&](OptimizationRemark OR) {
+ return OR << "Parallel region is not known to be called from a "
+ "unique single target region, maybe the surrounding "
+ "function has external linkage?; will not attempt to "
+ "rewrite the state machine use.";
+ };
+ emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
+ Remark);
+ }
+ continue;
+ }
+
+ // We now know F is a parallel body function called only from the kernel K.
+ // We also identified the state machine uses in which we replace the
+ // function pointer by a new global symbol for identification purposes. This
+ // ensures only direct calls to the function are left.
+
+ {
+ auto RemarkParalleRegion = [&](OptimizationRemark OR) {
+ return OR << "Specialize parallel region that is only reached from a "
+ "single target region to avoid spurious call edges and "
+ "excessive register usage in other target regions. "
+ "(parallel region ID: "
+ << ore::NV("OpenMPParallelRegion", F->getName())
+ << ", kernel ID: "
+ << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
+ };
+ emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD",
+ RemarkParalleRegion);
+ auto RemarkKernel = [&](OptimizationRemark OR) {
+ return OR << "Target region containing the parallel region that is "
+ "specialized. (parallel region ID: "
+ << ore::NV("OpenMPParallelRegion", F->getName())
+ << ", kernel ID: "
+ << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
+ };
+ emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
+ }
+
+ Module &M = *F->getParent();
+ Type *Int8Ty = Type::getInt8Ty(M.getContext());
+
+ auto *ID = new GlobalVariable(
+ M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
+ UndefValue::get(Int8Ty), F->getName() + ".ID");
+
+ for (Use *U : ToBeReplacedStateMachineUses)
+ U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
+
+ ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// Abstract Attribute for tracking ICV values.
+struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
+ AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ /// Returns true if value is assumed to be tracked.
+ bool isAssumedTracked() const { return getAssumed(); }
+
+ /// Returns true if value is known to be tracked.
+ bool isKnownTracked() const { return getAssumed(); }
+
+ /// Create an abstract attribute biew for the position \p IRP.
+ static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// Return the value with which \p I can be replaced for specific \p ICV.
+ virtual Value *getReplacementValue(InternalControlVar ICV,
+ const Instruction *I, Attributor &A) = 0;
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAICVTracker"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is AAICVTracker
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ static const char ID;
+};
+
+struct AAICVTrackerFunction : public AAICVTracker {
+ AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
+ : AAICVTracker(IRP, A) {}
+
+ // FIXME: come up with better string.
+ const std::string getAsStr() const override { return "ICVTracker"; }
+
+ // FIXME: come up with some stats.
+ void trackStatistics() const override {}
+
+ /// TODO: decide whether to deduplicate here, or use current
+ /// deduplicateRuntimeCalls function.
+ ChangeStatus manifest(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ for (InternalControlVar &ICV : TrackableICVs)
+ if (deduplicateICVGetters(ICV, A))
+ Changed = ChangeStatus::CHANGED;
+
+ return Changed;
+ }
+
+ bool deduplicateICVGetters(InternalControlVar &ICV, Attributor &A) {
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ auto &ICVInfo = OMPInfoCache.ICVs[ICV];
+ auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
+
+ bool Changed = false;
+
+ auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
+ CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ Value *ReplVal = getReplacementValue(ICV, UserI, A);
+
+ if (!ReplVal || !CI)
+ return false;
+
+ A.removeCallSite(CI);
+ CI->replaceAllUsesWith(ReplVal);
+ CI->eraseFromParent();
+ Changed = true;
+ return true;
+ };
+
+ GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope());
+ return Changed;
+ }
+
+ // Map of ICV to their values at specific program point.
+ EnumeratedArray<SmallSetVector<ICVValue, 4>, InternalControlVar,
+ InternalControlVar::ICV___last>
+ ICVValuesMap;
+
+ // Currently only nthreads is being tracked.
+ // this array will only grow with time.
+ InternalControlVar TrackableICVs[1] = {ICV_nthreads};
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+
+ Function *F = getAnchorScope();
+
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+
+ for (InternalControlVar ICV : TrackableICVs) {
+ auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
+
+ auto TrackValues = [&](Use &U, Function &) {
+ CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
+ if (!CI)
+ return false;
+
+ // FIXME: handle setters with more that 1 arguments.
+ /// Track new value.
+ if (ICVValuesMap[ICV].insert(ICVValue(CI, CI->getArgOperand(0))))
+ HasChanged = ChangeStatus::CHANGED;
+
+ return false;
+ };
+
+ SetterRFI.foreachUse(TrackValues, F);
+ }
+
+ return HasChanged;
+ }
+
+ /// Return the value with which \p I can be replaced for specific \p ICV.
+ Value *getReplacementValue(InternalControlVar ICV, const Instruction *I,
+ Attributor &A) override {
+ const BasicBlock *CurrBB = I->getParent();
+
+ auto &ValuesSet = ICVValuesMap[ICV];
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
+
+ for (const auto &ICVVal : ValuesSet) {
+ if (CurrBB == ICVVal.Inst->getParent()) {
+ if (!ICVVal.Inst->comesBefore(I))
+ continue;
+
+ // both instructions are in the same BB and at \p I we know the ICV
+ // value.
+ while (I != ICVVal.Inst) {
+ // we don't yet know if a call might update an ICV.
+ // TODO: check callsite AA for value.
+ if (const auto *CB = dyn_cast<CallBase>(I))
+ if (CB->getCalledFunction() != GetterRFI.Declaration)
+ return nullptr;
+
+ I = I->getPrevNode();
+ }
+
+ // No call in between, return the value.
+ return ICVVal.TrackedValue;
+ }
+ }
+
+ // No value was tracked.
+ return nullptr;
+ }
+};
+} // namespace
+
+const char AAICVTracker::ID = 0;
+
+AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
+ AAICVTracker *AA = nullptr;
+ switch (IRP.getPositionKind()) {
+ case IRPosition::IRP_INVALID:
+ case IRPosition::IRP_FLOAT:
+ case IRPosition::IRP_ARGUMENT:
+ case IRPosition::IRP_RETURNED:
+ case IRPosition::IRP_CALL_SITE_RETURNED:
+ case IRPosition::IRP_CALL_SITE_ARGUMENT:
+ case IRPosition::IRP_CALL_SITE:
+ llvm_unreachable("ICVTracker can only be created for function position!");
+ case IRPosition::IRP_FUNCTION:
+ AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
+ break;
+ }
+
+ return *AA;
+}
+
+PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+ if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
+ return PreservedAnalyses::all();
+
+ if (DisableOpenMPOptimizations)
+ return PreservedAnalyses::all();
+
+ SmallVector<Function *, 16> SCC;
+ for (LazyCallGraph::Node &N : C)
+ SCC.push_back(&N.getFunction());
+
+ if (SCC.empty())
+ return PreservedAnalyses::all();
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
+ AnalysisGetter AG(FAM);
+
+ auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
+ return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
+ };
+
+ CallGraphUpdater CGUpdater;
+ CGUpdater.initialize(CG, C, AM, UR);
+
+ SetVector<Function *> Functions(SCC.begin(), SCC.end());
+ BumpPtrAllocator Allocator;
+ OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
+ /*CGSCC*/ Functions, OMPInModule.getKernels());
+
+ Attributor A(Functions, InfoCache, CGUpdater);
+
+ // TODO: Compute the module slice we are allowed to look at.
+ OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
+ bool Changed = OMPOpt.run();
+ if (Changed)
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct OpenMPOptLegacyPass : public CallGraphSCCPass {
+ CallGraphUpdater CGUpdater;
+ OpenMPInModule OMPInModule;
+ static char ID;
+
+ OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
+ initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
+
+ bool doInitialization(CallGraph &CG) override {
+ // Disable the pass if there is no OpenMP (runtime call) in the module.
+ containsOpenMP(CG.getModule(), OMPInModule);
+ return false;
+ }
+
+ bool runOnSCC(CallGraphSCC &CGSCC) override {
+ if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
+ return false;
+ if (DisableOpenMPOptimizations || skipSCC(CGSCC))
+ return false;
+
+ SmallVector<Function *, 16> SCC;
+ for (CallGraphNode *CGN : CGSCC)
+ if (Function *Fn = CGN->getFunction())
+ if (!Fn->isDeclaration())
+ SCC.push_back(Fn);
+
+ if (SCC.empty())
+ return false;
+
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ CGUpdater.initialize(CG, CGSCC);
+
+ // Maintain a map of functions to avoid rebuilding the ORE
+ DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
+ auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
+ std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
+ if (!ORE)
+ ORE = std::make_unique<OptimizationRemarkEmitter>(F);
+ return *ORE;
+ };
+
+ AnalysisGetter AG;
+ SetVector<Function *> Functions(SCC.begin(), SCC.end());
+ BumpPtrAllocator Allocator;
+ OMPInformationCache InfoCache(
+ *(Functions.back()->getParent()), AG, Allocator,
+ /*CGSCC*/ Functions, OMPInModule.getKernels());
+
+ Attributor A(Functions, InfoCache, CGUpdater);
+
+ // TODO: Compute the module slice we are allowed to look at.
+ OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
+ return OMPOpt.run();
+ }
+
+ bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
+};
+
+} // end anonymous namespace
+
+void OpenMPInModule::identifyKernels(Module &M) {
+
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+ if (!MD)
+ return;
+
+ for (auto *Op : MD->operands()) {
+ if (Op->getNumOperands() < 2)
+ continue;
+ MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
+ if (!KindID || KindID->getString() != "kernel")
+ continue;
+
+ Function *KernelFn =
+ mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
+ if (!KernelFn)
+ continue;
+
+ ++NumOpenMPTargetRegionKernels;
+
+ Kernels.insert(KernelFn);
+ }
+}
+
+bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
+ if (OMPInModule.isKnown())
+ return OMPInModule;
+
+ // MSVC doesn't like long if-else chains for some reason and instead just
+ // issues an error. Work around it..
+ do {
+#define OMP_RTL(_Enum, _Name, ...) \
+ if (M.getFunction(_Name)) { \
+ OMPInModule = true; \
+ break; \
+ }
+#include "llvm/Frontend/OpenMP/OMPKinds.def"
+ } while (false);
+
+ // Identify kernels once. TODO: We should split the OMPInformationCache into a
+ // module and an SCC part. The kernel information, among other things, could
+ // go into the module part.
+ if (OMPInModule.isKnown() && OMPInModule) {
+ OMPInModule.identifyKernels(M);
+ return true;
+ }
+
+ return OMPInModule = false;
+}
+
+char OpenMPOptLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
+ "OpenMP specific optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
+ "OpenMP specific optimizations", false, false)
+
+Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index cd3701e903080..5d863f1330a44 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -199,13 +198,14 @@ struct FunctionOutliningMultiRegionInfo {
struct PartialInlinerImpl {
PartialInlinerImpl(
- std::function<AssumptionCache &(Function &)> *GetAC,
+ function_ref<AssumptionCache &(Function &)> GetAC,
function_ref<AssumptionCache *(Function &)> LookupAC,
- std::function<TargetTransformInfo &(Function &)> *GTTI,
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
- ProfileSummaryInfo *ProfSI)
+ function_ref<TargetTransformInfo &(Function &)> GTTI,
+ function_ref<const TargetLibraryInfo &(Function &)> GTLI,
+ ProfileSummaryInfo &ProfSI,
+ function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
: GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
- GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
+ GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
bool run(Module &M);
// Main part of the transformation that calls helper functions to find
@@ -270,11 +270,12 @@ struct PartialInlinerImpl {
private:
int NumPartialInlining = 0;
- std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
- std::function<TargetTransformInfo &(Function &)> *GetTTI;
- Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
- ProfileSummaryInfo *PSI;
+ function_ref<TargetTransformInfo &(Function &)> GetTTI;
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
+ ProfileSummaryInfo &PSI;
// Return the frequency of the OutlininingBB relative to F's entry point.
// The result is no larger than 1 and is represented using BP.
@@ -282,9 +283,9 @@ private:
// edges from the guarding entry blocks).
BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
- // Return true if the callee of CS should be partially inlined with
+ // Return true if the callee of CB should be partially inlined with
// profit.
- bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
+ bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
BlockFrequency WeightedOutliningRcost,
OptimizationRemarkEmitter &ORE);
@@ -303,26 +304,22 @@ private:
NumPartialInlining >= MaxNumPartialInlining);
}
- static CallSite getCallSite(User *U) {
- CallSite CS;
- if (CallInst *CI = dyn_cast<CallInst>(U))
- CS = CallSite(CI);
- else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
- CS = CallSite(II);
- else
- llvm_unreachable("All uses must be calls");
- return CS;
+ static CallBase *getSupportedCallBase(User *U) {
+ if (isa<CallInst>(U) || isa<InvokeInst>(U))
+ return cast<CallBase>(U);
+ llvm_unreachable("All uses must be calls");
+ return nullptr;
}
- static CallSite getOneCallSiteTo(Function *F) {
+ static CallBase *getOneCallSiteTo(Function *F) {
User *User = *F->user_begin();
- return getCallSite(User);
+ return getSupportedCallBase(User);
}
std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
- CallSite CS = getOneCallSiteTo(F);
- DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
- BasicBlock *Block = CS.getParent();
+ CallBase *CB = getOneCallSiteTo(F);
+ DebugLoc DLoc = CB->getDebugLoc();
+ BasicBlock *Block = CB->getParent();
return std::make_tuple(DLoc, Block);
}
@@ -355,6 +352,7 @@ struct PartialInlinerLegacyPass : public ModulePass {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool runOnModule(Module &M) override {
@@ -364,11 +362,10 @@ struct PartialInlinerLegacyPass : public ModulePass {
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
TargetTransformInfoWrapperPass *TTIWP =
&getAnalysis<TargetTransformInfoWrapperPass>();
- ProfileSummaryInfo *PSI =
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ProfileSummaryInfo &PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&ACT](Function &F) -> AssumptionCache & {
+ auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
@@ -376,13 +373,16 @@ struct PartialInlinerLegacyPass : public ModulePass {
return ACT->lookupAssumptionCache(F);
};
- std::function<TargetTransformInfo &(Function &)> GetTTI =
- [&TTIWP](Function &F) -> TargetTransformInfo & {
+ auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
return TTIWP->getTTI(F);
};
- return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
- &GetTTI, NoneType::None, PSI)
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+
+ return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
+ GetTLI, PSI)
.run(M);
}
};
@@ -403,10 +403,10 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
BFI = ScopedBFI.get();
} else
- BFI = &(*GetBFI)(*F);
+ BFI = &(GetBFI(*F));
// Return if we don't have profiling information.
- if (!PSI->hasInstrumentationProfile())
+ if (!PSI.hasInstrumentationProfile())
return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
@@ -479,7 +479,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
// Only consider regions with predecessor blocks that are considered
// not-cold (default: part of the top 99.99% of all block counters)
// AND greater than our minimum block execution count (default: 100).
- if (PSI->isColdBlock(thisBB, BFI) ||
+ if (PSI.isColdBlock(thisBB, BFI) ||
BBProfileCount(thisBB) < MinBlockCounterExecution)
continue;
for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
@@ -759,31 +759,28 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
}
bool PartialInlinerImpl::shouldPartialInline(
- CallSite CS, FunctionCloner &Cloner,
- BlockFrequency WeightedOutliningRcost,
+ CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
OptimizationRemarkEmitter &ORE) {
using namespace ore;
- Instruction *Call = CS.getInstruction();
- Function *Callee = CS.getCalledFunction();
+ Function *Callee = CB.getCalledFunction();
assert(Callee == Cloner.ClonedFunc);
if (SkipCostAnalysis)
- return isInlineViable(*Callee);
+ return isInlineViable(*Callee).isSuccess();
- Function *Caller = CS.getCaller();
- auto &CalleeTTI = (*GetTTI)(*Callee);
+ Function *Caller = CB.getCaller();
+ auto &CalleeTTI = GetTTI(*Callee);
bool RemarksEnabled =
Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
DEBUG_TYPE);
- assert(Call && "invalid callsite for partial inline");
- InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
- CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
- RemarksEnabled ? &ORE : nullptr);
+ InlineCost IC =
+ getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
+ GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
if (IC.isAlways()) {
ORE.emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
<< NV("Callee", Cloner.OrigFunc)
<< " should always be fully inlined, not partially";
});
@@ -792,7 +789,7 @@ bool PartialInlinerImpl::shouldPartialInline(
if (IC.isNever()) {
ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
<< NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller)
<< " because it should never be inlined (cost=never)";
@@ -802,7 +799,7 @@ bool PartialInlinerImpl::shouldPartialInline(
if (!IC) {
ORE.emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
<< NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
@@ -813,14 +810,14 @@ bool PartialInlinerImpl::shouldPartialInline(
const DataLayout &DL = Caller->getParent()->getDataLayout();
// The savings of eliminating the call:
- int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
+ int NonWeightedSavings = getCallsiteCost(CB, DL);
BlockFrequency NormWeightedSavings(NonWeightedSavings);
// Weighted saving is smaller than weighted cost, return false
if (NormWeightedSavings < WeightedOutliningRcost) {
ORE.emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
- Call)
+ &CB)
<< NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
<< NV("Caller", Caller) << " runtime overhead (overhead="
<< NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
@@ -834,7 +831,7 @@ bool PartialInlinerImpl::shouldPartialInline(
}
ORE.emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
<< NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
<< NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
<< " (threshold="
@@ -941,20 +938,20 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
CurrentCallerBFI = TempBFI.get();
} else {
// New pass manager:
- CurrentCallerBFI = &(*GetBFI)(*Caller);
+ CurrentCallerBFI = &(GetBFI(*Caller));
}
};
for (User *User : Users) {
- CallSite CS = getCallSite(User);
- Function *Caller = CS.getCaller();
+ CallBase *CB = getSupportedCallBase(User);
+ Function *Caller = CB->getCaller();
if (CurrentCaller != Caller) {
CurrentCaller = Caller;
ComputeCurrBFI(Caller);
} else {
assert(CurrentCallerBFI && "CallerBFI is not set");
}
- BasicBlock *CallBB = CS.getInstruction()->getParent();
+ BasicBlock *CallBB = CB->getParent();
auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
if (Count)
CallSiteToProfCountMap[User] = *Count;
@@ -1155,8 +1152,8 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
if (OutlinedFunc) {
- CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
- BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
+ CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
+ BasicBlock *OutliningCallBB = OCS->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
NumColdRegionsOutlined++;
@@ -1164,7 +1161,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
if (MarkOutlinedColdCC) {
OutlinedFunc->setCallingConv(CallingConv::Cold);
- OCS.setCallingConv(CallingConv::Cold);
+ OCS->setCallingConv(CallingConv::Cold);
}
} else
ORE.emit([&]() {
@@ -1224,7 +1221,6 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
if (OutlinedFunc) {
BasicBlock *OutliningCallBB =
PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
- .getInstruction()
->getParent();
assert(OutliningCallBB->getParent() == ClonedFunc);
OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
@@ -1266,7 +1262,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (F->hasFnAttribute(Attribute::NoInline))
return {false, nullptr};
- if (PSI->isFunctionEntryCold(F))
+ if (PSI.isFunctionEntryCold(F))
return {false, nullptr};
if (F->users().empty())
@@ -1276,7 +1272,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
// Only try to outline cold regions if we have a profile summary, which
// implies we have profiling information.
- if (PSI->hasProfileSummary() && F->hasProfileData() &&
+ if (PSI.hasProfileSummary() && F->hasProfileData() &&
!DisableMultiRegionPartialInline) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F, ORE);
@@ -1285,8 +1281,8 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
#ifndef NDEBUG
if (TracePartialInlining) {
- dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
- dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
+ dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
+ dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
<< "\n";
}
#endif
@@ -1391,27 +1387,28 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
bool AnyInline = false;
for (User *User : Users) {
- CallSite CS = getCallSite(User);
+ CallBase *CB = getSupportedCallBase(User);
if (IsLimitReached())
continue;
- OptimizationRemarkEmitter CallerORE(CS.getCaller());
- if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
+ OptimizationRemarkEmitter CallerORE(CB->getCaller());
+ if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
continue;
// Construct remark before doing the inlining, as after successful inlining
// the callsite is removed.
- OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
+ OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
- << ore::NV("Caller", CS.getCaller());
+ << ore::NV("Caller", CB->getCaller());
- InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
+ InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
// We can only forward varargs when we outlined a single region, else we
// bail on vararg functions.
- if (!InlineFunction(CS, IFI, nullptr, true,
+ if (!InlineFunction(*CB, IFI, nullptr, true,
(Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
- : nullptr)))
+ : nullptr))
+ .isSuccess())
continue;
CallerORE.emit(OR);
@@ -1492,6 +1489,7 @@ INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
@@ -1503,8 +1501,7 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&FAM](Function &F) -> AssumptionCache & {
+ auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
@@ -1512,20 +1509,22 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
return FAM.getCachedResult<AssumptionAnalysis>(F);
};
- std::function<BlockFrequencyInfo &(Function &)> GetBFI =
- [&FAM](Function &F) -> BlockFrequencyInfo & {
+ auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
- std::function<TargetTransformInfo &(Function &)> GetTTI =
- [&FAM](Function &F) -> TargetTransformInfo & {
+ auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
- ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
- if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
- {GetBFI}, PSI)
+ if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
+ GetTLI, PSI, GetBFI)
.run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9c992830879ae..d73d42c52074b 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
@@ -46,6 +47,7 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
+#include "llvm/Transforms/Vectorize/VectorCombine.h"
using namespace llvm;
@@ -98,8 +100,8 @@ static cl::opt<bool>
EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable performing ThinLTO."));
-cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
- cl::desc("Enable hot-cold splitting pass"));
+cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
+ cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
@@ -115,7 +117,7 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(false), cl::Hidden,
+ "enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
@@ -129,7 +131,7 @@ static cl::opt<bool> EnableSimpleLoopUnswitch(
"cleanup passes integrated into the loop pass manager pipeline."));
static cl::opt<bool> EnableGVNSink(
- "enable-gvn-sink", cl::init(false), cl::Hidden,
+ "enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
cl::desc("Enable the GVN sinking pass (default = off)"));
// This option is used in simplifying testing SampleFDO optimizations for
@@ -151,15 +153,29 @@ static cl::opt<bool>
EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
cl::desc("Enable lowering of the matrix intrinsics"));
+cl::opt<AttributorRunOption> AttributorRun(
+ "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
+ cl::desc("Enable the attributor inter-procedural deduction pass."),
+ cl::values(clEnumValN(AttributorRunOption::ALL, "all",
+ "enable all attributor runs"),
+ clEnumValN(AttributorRunOption::MODULE, "module",
+ "enable module-wide attributor runs"),
+ clEnumValN(AttributorRunOption::CGSCC, "cgscc",
+ "enable call graph SCC attributor runs"),
+ clEnumValN(AttributorRunOption::NONE, "none",
+ "disable attributor runs")));
+
+extern cl::opt<bool> EnableKnowledgeRetention;
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
LibraryInfo = nullptr;
Inliner = nullptr;
DisableUnrollLoops = false;
- SLPVectorize = RunSLPVectorization;
- LoopVectorize = EnableLoopVectorization;
- LoopsInterleaved = EnableLoopInterleaving;
+ SLPVectorize = false;
+ LoopVectorize = true;
+ LoopsInterleaved = true;
RerollLoops = RunLoopRerolling;
NewGVN = RunNewGVN;
LicmMssaOptCap = SetLicmMssaOptCap;
@@ -179,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() {
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = EnablePerformThinLTO;
DivergentTarget = false;
+ CallGraphProfile = true;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -187,8 +204,13 @@ PassManagerBuilder::~PassManagerBuilder() {
}
/// Set of global extensions, automatically added as part of the standard set.
-static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
- PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+static ManagedStatic<
+ SmallVector<std::tuple<PassManagerBuilder::ExtensionPointTy,
+ PassManagerBuilder::ExtensionFn,
+ PassManagerBuilder::GlobalExtensionID>,
+ 8>>
+ GlobalExtensions;
+static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter;
/// Check if GlobalExtensions is constructed and not empty.
/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
@@ -197,10 +219,29 @@ static bool GlobalExtensionsNotEmpty() {
return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
}
-void PassManagerBuilder::addGlobalExtension(
- PassManagerBuilder::ExtensionPointTy Ty,
- PassManagerBuilder::ExtensionFn Fn) {
- GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn)));
+PassManagerBuilder::GlobalExtensionID
+PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty,
+ PassManagerBuilder::ExtensionFn Fn) {
+ auto ExtensionID = GlobalExtensionsCounter++;
+ GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID));
+ return ExtensionID;
+}
+
+void PassManagerBuilder::removeGlobalExtension(
+ PassManagerBuilder::GlobalExtensionID ExtensionID) {
+ // RegisterStandardPasses may try to call this function after GlobalExtensions
+ // has already been destroyed; doing so should not generate an error.
+ if (!GlobalExtensions.isConstructed())
+ return;
+
+ auto GlobalExtension =
+ llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) {
+ return std::get<2>(elem) == ExtensionID;
+ });
+ assert(GlobalExtension != GlobalExtensions->end() &&
+ "The extension ID to be removed should always be valid.");
+
+ GlobalExtensions->erase(GlobalExtension);
}
void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
@@ -211,8 +252,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
legacy::PassManagerBase &PM) const {
if (GlobalExtensionsNotEmpty()) {
for (auto &Ext : *GlobalExtensions) {
- if (Ext.first == ETy)
- Ext.second(*this, PM);
+ if (std::get<0>(Ext) == ETy)
+ std::get<1>(Ext)(*this, PM);
}
}
for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
@@ -244,12 +285,6 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses(
PM.add(createScopedNoAliasAAWrapperPass());
}
-void PassManagerBuilder::addInstructionCombiningPass(
- legacy::PassManagerBase &PM) const {
- bool ExpensiveCombines = OptLevel > 2;
- PM.add(createInstructionCombiningPass(ExpensiveCombines));
-}
-
void PassManagerBuilder::populateFunctionPassManager(
legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
@@ -327,6 +362,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
+ if (EnableKnowledgeRetention)
+ MPM.add(createAssumeSimplifyPass());
if (OptLevel > 1) {
if (EnableGVNHoist)
@@ -348,7 +385,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Combine silly seq's
if (OptLevel > 2)
MPM.add(createAggressiveInstCombinerPass());
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
@@ -383,7 +420,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
MPM.add(createCFGSimplificationPass());
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
@@ -414,7 +451,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1) {
MPM.add(createJumpThreadingPass()); // Thread jumps
@@ -432,7 +469,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Clean up after everything.
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
if (EnableCHR && OptLevel >= 3 &&
@@ -478,6 +515,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createBarrierNoopPass());
if (PerformThinLTO) {
+ MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
// Drop available_externally and unreferenced globals. This is necessary
// with ThinLTO in order to avoid leaving undefined references to dead
// globals in the object file.
@@ -511,9 +549,11 @@ void PassManagerBuilder::populateModulePassManager(
// inter-module indirect calls. For that we perform indirect call promotion
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
- if (PerformThinLTO)
+ if (PerformThinLTO) {
MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
!PGOSampleUse.empty()));
+ MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
+ }
// For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
// as it will change the CFG too much to make the 2nd profile annotation
@@ -526,6 +566,10 @@ void PassManagerBuilder::populateModulePassManager(
// Infer attributes about declarations if possible.
MPM.add(createInferFunctionAttrsLegacyPass());
+ // Infer attributes on declarations, call sites, arguments, etc.
+ if (AttributorRun & AttributorRunOption::MODULE)
+ MPM.add(createAttributorLegacyPass());
+
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
if (OptLevel > 2)
@@ -534,16 +578,13 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createCalledValuePropagationPass());
- // Infer attributes on declarations, call sites, arguments, etc.
- MPM.add(createAttributorLegacyPass());
-
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
// Promote any localized global vars.
MPM.add(createPromoteMemoryToRegisterPass());
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
- addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
+ MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
@@ -574,6 +615,15 @@ void PassManagerBuilder::populateModulePassManager(
RunInliner = true;
}
+ // Infer attributes on declarations, call sites, arguments, etc. for an SCC.
+ if (AttributorRun & AttributorRunOption::CGSCC)
+ MPM.add(createAttributorCGSCCLegacyPass());
+
+ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+ // there are no OpenMP runtime calls present in the module.
+ if (OptLevel > 1)
+ MPM.add(createOpenMPOptLegacyPass());
+
MPM.add(createPostOrderFunctionAttrsLegacyPass());
if (OptLevel > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -705,7 +755,7 @@ void PassManagerBuilder::populateModulePassManager(
// on -O1 and no #pragma is found). Would be good to have these two passes
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
if (OptLevel > 1 && ExtraVectorizerPasses) {
// At higher optimization levels, try to clean up any runtime overlap and
// alignment checks inserted by the vectorizer. We want to track correllated
@@ -715,11 +765,11 @@ void PassManagerBuilder::populateModulePassManager(
// dead (or speculatable) control flows or more combining opportunities.
MPM.add(createEarlyCSEPass());
MPM.add(createCorrelatedValuePropagationPass());
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
}
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
@@ -736,8 +786,11 @@ void PassManagerBuilder::populateModulePassManager(
}
}
+ // Enhance/cleanup vector code.
+ MPM.add(createVectorCombinePass());
+
addExtensionsToPM(EP_Peephole, MPM);
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
if (EnableUnrollAndJam && !DisableUnrollLoops) {
// Unroll and Jam. We do this before unroll but need to be in a separate
@@ -752,7 +805,7 @@ void PassManagerBuilder::populateModulePassManager(
if (!DisableUnrollLoops) {
// LoopUnroll may generate some redundency to cleanup.
- addInstructionCombiningPass(MPM);
+ MPM.add(createInstructionCombiningPass());
// Runtime unrolling will introduce runtime check in loop prologue. If the
// unrolled loop is a inner loop, then the prologue will be inside the
@@ -785,6 +838,10 @@ void PassManagerBuilder::populateModulePassManager(
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
+ // Add Module flag "CG Profile" based on Branch Frequency Information.
+ if (CallGraphProfile)
+ MPM.add(createCGProfileLegacyPass());
+
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@@ -852,7 +909,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createCalledValuePropagationPass());
// Infer attributes on declarations, call sites, arguments, etc.
- PM.add(createAttributorLegacyPass());
+ if (AttributorRun & AttributorRunOption::MODULE)
+ PM.add(createAttributorLegacyPass());
}
// Infer attributes about definitions. The readnone attribute in particular is
@@ -890,7 +948,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// calls, etc, so let instcombine do this.
if (OptLevel > 2)
PM.add(createAggressiveInstCombinerPass());
- addInstructionCombiningPass(PM);
+ PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
// Inline small functions
@@ -905,6 +963,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// CSFDO instrumentation and use pass.
addPGOInstrPasses(PM, /* IsCS */ true);
+ // Infer attributes on declarations, call sites, arguments, etc. for an SCC.
+ if (AttributorRun & AttributorRunOption::CGSCC)
+ PM.add(createAttributorCGSCCLegacyPass());
+
+ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+ // there are no OpenMP runtime calls present in the module.
+ if (OptLevel > 1)
+ PM.add(createOpenMPOptLegacyPass());
+
// Optimize globals again if we ran the inliner.
if (RunInliner)
PM.add(createGlobalOptimizerPass());
@@ -915,7 +982,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createArgumentPromotionPass());
// The IPO passes may leave cruft around. Clean up after them.
- addInstructionCombiningPass(PM);
+ PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
@@ -960,22 +1027,24 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
// optimizer again at this point.
- addInstructionCombiningPass(PM); // Initial cleanup
+ PM.add(createInstructionCombiningPass()); // Initial cleanup
PM.add(createCFGSimplificationPass()); // if-convert
PM.add(createSCCPPass()); // Propagate exposed constants
- addInstructionCombiningPass(PM); // Clean up again
+ PM.add(createInstructionCombiningPass()); // Clean up again
PM.add(createBitTrackingDCEPass());
// More scalar chains could be vectorized due to more alias information
if (SLPVectorize)
PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ PM.add(createVectorCombinePass()); // Clean up partial vectorization.
+
// After vectorization, assume intrinsics may tell us more about pointer
// alignments.
PM.add(createAlignmentFromAssumptionsPass());
// Cleanup and simplify the code after the scalar optimizations.
- addInstructionCombiningPass(PM);
+ PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
@@ -1013,8 +1082,8 @@ void PassManagerBuilder::populateThinLTOPassManager(
PM.add(createVerifierPass());
if (ImportSummary) {
- // These passes import type identifier resolutions for whole-program
- // devirtualization and CFI. They must run early because other passes may
+ // This pass imports type identifier resolutions for whole-program
+ // devirtualization and CFI. It must run early because other passes may
// disturb the specific instruction patterns that these passes look for,
// creating dependencies on resolutions that may not appear in the summary.
//
@@ -1062,6 +1131,9 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO pipeline).
+ PM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
@@ -1072,14 +1144,6 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
PM.add(createVerifierPass());
}
-inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
- return reinterpret_cast<PassManagerBuilder*>(P);
-}
-
-inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
- return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
-}
-
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp
index 45a0ce20eb175..a16dc664db64d 100644
--- a/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -135,8 +135,8 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
SCCMightUnwind |= InstMightUnwind;
}
if (CheckReturnViaAsm && !SCCMightReturn)
- if (auto ICS = ImmutableCallSite(&I))
- if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue()))
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (const auto *IA = dyn_cast<InlineAsm>(CB->getCalledOperand()))
if (IA->hasSideEffects())
SCCMightReturn = true;
}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a1fbb1adc412c..b6871e260532d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -37,15 +37,16 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -148,14 +149,17 @@ static cl::opt<bool> ProfileAccurateForSymsInList(
"be accurate. It may be overriden by profile-sample-accurate. "));
static cl::opt<bool> ProfileMergeInlinee(
- "sample-profile-merge-inlinee", cl::Hidden, cl::init(false),
+ "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
cl::desc("Merge past inlinee's profile to outline version if sample "
- "profile loader decided not to inline a call site."));
+ "profile loader decided not to inline a call site. It will "
+ "only be enabled when top-down order of profile loading is "
+ "enabled. "));
static cl::opt<bool> ProfileTopDownLoad(
- "sample-profile-top-down-load", cl::Hidden, cl::init(false),
+ "sample-profile-top-down-load", cl::Hidden, cl::init(true),
cl::desc("Do profile annotation and inlining for functions in top-down "
- "order of call graph during sample profile loading."));
+ "order of call graph during sample profile loading. It only "
+ "works for new pass manager. "));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
@@ -235,7 +239,7 @@ public:
DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
: CurrentReader(Reader), CurrentModule(M),
CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
- if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ if (!CurrentReader.useMD5())
return;
for (const auto &F : CurrentModule) {
@@ -261,7 +265,7 @@ public:
}
~GUIDToFuncNameMapper() {
- if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ if (!CurrentReader.useMD5())
return;
CurrentGUIDToFuncNameMap.clear();
@@ -307,10 +311,12 @@ public:
SampleProfileLoader(
StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
- std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
+ std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI)
: GetAC(std::move(GetAssumptionCache)),
- GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
- Filename(Name), RemappingFilename(RemapName),
+ GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
+ CoverageTracker(*this), Filename(std::string(Name)),
+ RemappingFilename(std::string(RemapName)),
IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
@@ -327,18 +333,19 @@ protected:
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
- const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+ const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineCallInstruction(Instruction *I);
+ bool inlineCallInstruction(CallBase &CB);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
// Inline cold/small functions in addition to hot ones
- bool shouldInlineColdCallee(Instruction &CallInst);
+ bool shouldInlineColdCallee(CallBase &CallInst);
void emitOptimizationRemarksForInlineCandidates(
- const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot);
+ const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
+ bool Hot);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -397,6 +404,7 @@ protected:
std::function<AssumptionCache &(Function &)> GetAC;
std::function<TargetTransformInfo &(Function &)> GetTTI;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
/// Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -474,14 +482,17 @@ public:
SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
bool IsThinLTOPreLink = false)
- : ModulePass(ID),
- SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
- [&](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- },
- [&](Function &F) -> TargetTransformInfo & {
- return TTIWP->getTTI(F);
- }) {
+ : ModulePass(ID), SampleLoader(
+ Name, SampleProfileRemappingFile, IsThinLTOPreLink,
+ [&](Function &F) -> AssumptionCache & {
+ return ACT->getAssumptionCache(F);
+ },
+ [&](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ },
+ [&](Function &F) -> TargetLibraryInfo & {
+ return TLIWP->getTLI(F);
+ }) {
initializeSampleProfileLoaderLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -498,6 +509,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
@@ -505,6 +517,7 @@ private:
SampleProfileLoader SampleLoader;
AssumptionCacheTracker *ACT = nullptr;
TargetTransformInfoWrapperPass *TTIWP = nullptr;
+ TargetLibraryInfoWrapperPass *TLIWP = nullptr;
};
} // end anonymous namespace
@@ -710,10 +723,9 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// (findCalleeFunctionSamples returns non-empty result), but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
- !ImmutableCallSite(&Inst).isIndirectCall() &&
- findCalleeFunctionSamples(Inst))
- return 0;
+ if (auto *CB = dyn_cast<CallBase>(&Inst))
+ if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+ return 0;
const DILocation *DIL = DLoc;
uint32_t LineOffset = FunctionSamples::getOffset(DIL);
@@ -801,7 +813,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
///
/// \returns The FunctionSamples pointer to the inlined instance.
const FunctionSamples *
-SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
+SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
return nullptr;
@@ -885,13 +897,11 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return it.first->second;
}
-bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
- assert(isa<CallInst>(I) || isa<InvokeInst>(I));
- CallSite CS(I);
- Function *CalledFunction = CS.getCalledFunction();
+bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+ Function *CalledFunction = CB.getCalledFunction();
assert(CalledFunction);
- DebugLoc DLoc = I->getDebugLoc();
- BasicBlock *BB = I->getParent();
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *BB = CB.getParent();
InlineParams Params = getInlineParams();
Params.ComputeFullInlineCost = true;
// Checks if there is anything in the reachable portion of the callee at
@@ -901,46 +911,43 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
// The acutal cost does not matter because we only checks isNever() to
// see if it is legal to inline the callsite.
InlineCost Cost =
- getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
- None, nullptr, nullptr);
+ getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
if (Cost.isNever()) {
ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
<< "incompatible inlining");
return false;
}
- InlineFunctionInfo IFI(nullptr, &GetAC);
- if (InlineFunction(CS, IFI)) {
+ InlineFunctionInfo IFI(nullptr, GetAC);
+ if (InlineFunction(CB, IFI).isSuccess()) {
// The call to InlineFunction erases I, so we can't pass it here.
- ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB)
- << "inlined callee '" << ore::NV("Callee", CalledFunction)
- << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
+ emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+ true, CSINLINE_DEBUG);
return true;
}
return false;
}
-bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
+bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
if (!ProfileSizeInline)
return false;
- Function *Callee = CallSite(&CallInst).getCalledFunction();
+ Function *Callee = CallInst.getCalledFunction();
if (Callee == nullptr)
return false;
- InlineCost Cost =
- getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
- GetTTI(*Callee), GetAC, None, nullptr, nullptr);
+ InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
+ GetAC, GetTLI);
return Cost.getCost() <= SampleColdCallSiteThreshold;
}
void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
- const SmallVector<Instruction *, 10> &Candidates, const Function &F,
+ const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
bool Hot) {
for (auto I : Candidates) {
- Function *CalledFunction = CallSite(I).getCalledFunction();
+ Function *CalledFunction = I->getCalledFunction();
if (CalledFunction) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
I->getDebugLoc(), I->getParent())
<< "previous inlining reattempted for "
<< (Hot ? "hotness: '" : "size: '")
@@ -975,43 +982,43 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
+ DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
bool LocalChanged = false;
- SmallVector<Instruction *, 10> CIS;
+ SmallVector<CallBase *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
- SmallVector<Instruction *, 10> AllCandidates;
- SmallVector<Instruction *, 10> ColdCandidates;
+ SmallVector<CallBase *, 10> AllCandidates;
+ SmallVector<CallBase *, 10> ColdCandidates;
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
- if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
- !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
- AllCandidates.push_back(&I);
- if (FS->getEntrySamples() > 0)
- localNotInlinedCallSites.try_emplace(&I, FS);
- if (callsiteIsHot(FS, PSI))
- Hot = true;
- else if (shouldInlineColdCallee(I))
- ColdCandidates.push_back(&I);
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
+ AllCandidates.push_back(CB);
+ if (FS->getEntrySamples() > 0)
+ localNotInlinedCallSites.try_emplace(CB, FS);
+ if (callsiteIsHot(FS, PSI))
+ Hot = true;
+ else if (shouldInlineColdCallee(*CB))
+ ColdCandidates.push_back(CB);
+ }
}
}
if (Hot) {
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
- }
- else {
+ } else {
CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
}
}
- for (auto I : CIS) {
- Function *CalledFunction = CallSite(I).getCalledFunction();
+ for (CallBase *I : CIS) {
+ Function *CalledFunction = I->getCalledFunction();
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
- if (CallSite(I).isIndirectCall()) {
+ if (I->isIndirectCall()) {
if (PromotedInsns.count(I))
continue;
uint64_t Sum;
@@ -1021,7 +1028,7 @@ bool SampleProfileLoader::inlineHotFunctions(
PSI->getOrCompHotCountThreshold());
continue;
}
- auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
+ auto CalleeFunctionName = FS->getFuncName();
// If it is a recursive call, we do not inline it as it could bloat
// the code exponentially. There is way to better handle this, e.g.
// clone the caller first, and inline the cloned caller if it is
@@ -1038,15 +1045,16 @@ bool SampleProfileLoader::inlineHotFunctions(
if (R != SymbolMap.end() && R->getValue() &&
!R->getValue()->isDeclaration() &&
R->getValue()->getSubprogram() &&
- isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
+ R->getValue()->hasFnAttribute("use-sample-profile") &&
+ isLegalToPromote(*I, R->getValue(), &Reason)) {
uint64_t C = FS->getEntrySamples();
- Instruction *DI =
- pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
+ auto &DI =
+ pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE);
Sum -= C;
PromotedInsns.insert(I);
// If profile mismatches, we should not attempt to inline DI.
if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(DI)) {
+ inlineCallInstruction(cast<CallBase>(DI))) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
++NumCSInlined;
@@ -1059,7 +1067,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(I)) {
+ if (inlineCallInstruction(*I)) {
localNotInlinedCallSites.erase(I);
LocalChanged = true;
++NumCSInlined;
@@ -1078,8 +1086,8 @@ bool SampleProfileLoader::inlineHotFunctions(
// Accumulate not inlined callsite information into notInlinedSamples
for (const auto &Pair : localNotInlinedCallSites) {
- Instruction *I = Pair.getFirst();
- Function *Callee = CallSite(I).getCalledFunction();
+ CallBase *I = Pair.getFirst();
+ Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
@@ -1525,8 +1533,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
for (auto &I : BB->getInstList()) {
if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
continue;
- CallSite CS(&I);
- if (!CS.getCalledFunction()) {
+ if (!cast<CallBase>(I).getCalledFunction()) {
const DebugLoc &DLoc = I.getDebugLoc();
if (!DLoc)
continue;
@@ -1770,6 +1777,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
"Sample Profile loader", false, false)
@@ -1780,8 +1788,17 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
FunctionOrderList.reserve(M.size());
if (!ProfileTopDownLoad || CG == nullptr) {
+ if (ProfileMergeInlinee) {
+ // Disable ProfileMergeInlinee if profile is not loaded in top down order,
+ // because the profile for a function may be used for the profile
+ // annotation of its outline copy before the profile merging of its
+ // non-inlined inline instances, and that is not the way how
+ // ProfileMergeInlinee is supposed to work.
+ ProfileMergeInlinee = false;
+ }
+
for (Function &F : M)
- if (!F.isDeclaration())
+ if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(&F);
return FunctionOrderList;
}
@@ -1791,7 +1808,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
while (!CGI.isAtEnd()) {
for (CallGraphNode *node : *CGI) {
auto F = node->getFunction();
- if (F && !F->isDeclaration())
+ if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
}
++CGI;
@@ -1839,15 +1856,16 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG) {
- GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
+ GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
PSI = _PSI;
- if (M.getProfileSummary(/* IsCS */ false) == nullptr)
+ if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
ProfileSummary::PSK_Sample);
-
+ PSI->refresh();
+ }
// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
@@ -1890,6 +1908,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
ACT = &getAnalysis<AssumptionCacheTracker>();
TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
+ TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
@@ -1966,12 +1985,15 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
+ auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
SampleProfileLoader SampleLoader(
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
- IsThinLTOPreLink, GetAssumptionCache, GetTTI);
+ IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
if (!SampleLoader.doInitialization(M))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 6ce00714523b3..088091df770f9 100644
--- a/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -147,10 +147,12 @@ static void RemoveDeadConstant(Constant *C) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals.
GV->eraseFromParent();
- }
- else if (!isa<Function>(C))
- if (isa<CompositeType>(C->getType()))
+ } else if (!isa<Function>(C)) {
+ // FIXME: Why does the type of the constant matter here?
+ if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType()) ||
+ isa<VectorType>(C->getType()))
C->destroyConstant();
+ }
// If the constant referenced anything, see if we can delete it as well.
for (Constant *O : Operands)
diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 45fd432fd721e..1b1e91cafa651 100644
--- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -31,7 +31,6 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/SyntheticCountsUtils.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -110,14 +109,13 @@ PreservedAnalyses SyntheticCountsPropagation::run(Module &M,
Optional<Scaled64> Res = None;
if (!Edge.first)
return Res;
- assert(isa<Instruction>(Edge.first));
- CallSite CS(cast<Instruction>(Edge.first));
- Function *Caller = CS.getCaller();
+ CallBase &CB = *cast<CallBase>(*Edge.first);
+ Function *Caller = CB.getCaller();
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
// Now compute the callsite count from relative frequency and
// entry count:
- BasicBlock *CSBB = CS.getInstruction()->getParent();
+ BasicBlock *CSBB = CB.getParent();
Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
BBCount /= EntryFreq;
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 5ccfb29b01a13..5a25f9857665c 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -57,12 +57,14 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
@@ -83,11 +85,12 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
@@ -115,12 +118,15 @@ static cl::opt<PassSummaryAction> ClSummaryAction(
static cl::opt<std::string> ClReadSummary(
"wholeprogramdevirt-read-summary",
- cl::desc("Read summary from given YAML file before running pass"),
+ cl::desc(
+ "Read summary from given bitcode or YAML file before running pass"),
cl::Hidden);
static cl::opt<std::string> ClWriteSummary(
"wholeprogramdevirt-write-summary",
- cl::desc("Write summary to given YAML file after running pass"),
+ cl::desc("Write summary to given bitcode or YAML file after running pass. "
+ "Output file format is deduced from extension: *.bc means writing "
+ "bitcode, otherwise YAML"),
cl::Hidden);
static cl::opt<unsigned>
@@ -134,6 +140,45 @@ static cl::opt<bool>
cl::init(false), cl::ZeroOrMore,
cl::desc("Print index-based devirtualization messages"));
+/// Provide a way to force enable whole program visibility in tests.
+/// This is needed to support legacy tests that don't contain
+/// !vcall_visibility metadata (the mere presense of type tests
+/// previously implied hidden visibility).
+cl::opt<bool>
+ WholeProgramVisibility("whole-program-visibility", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable whole program visibility"));
+
+/// Provide a way to force disable whole program for debugging or workarounds,
+/// when enabled via the linker.
+cl::opt<bool> DisableWholeProgramVisibility(
+ "disable-whole-program-visibility", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("Disable whole program visibility (overrides enabling options)"));
+
+/// Provide way to prevent certain function from being devirtualized
+cl::list<std::string>
+ SkipFunctionNames("wholeprogramdevirt-skip",
+ cl::desc("Prevent function(s) from being devirtualized"),
+ cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated);
+
+namespace {
+struct PatternList {
+ std::vector<GlobPattern> Patterns;
+ template <class T> void init(const T &StringList) {
+ for (const auto &S : StringList)
+ if (Expected<GlobPattern> Pat = GlobPattern::create(S))
+ Patterns.push_back(std::move(*Pat));
+ }
+ bool match(StringRef S) {
+ for (const GlobPattern &P : Patterns)
+ if (P.match(S))
+ return true;
+ return false;
+ }
+};
+} // namespace
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -308,20 +353,20 @@ namespace {
// A virtual call site. VTable is the loaded virtual table pointer, and CS is
// the indirect virtual call.
struct VirtualCallSite {
- Value *VTable;
- CallSite CS;
+ Value *VTable = nullptr;
+ CallBase &CB;
// If non-null, this field points to the associated unsafe use count stored in
// the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
// of that field for details.
- unsigned *NumUnsafeUses;
+ unsigned *NumUnsafeUses = nullptr;
void
emitRemark(const StringRef OptName, const StringRef TargetName,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
- Function *F = CS.getCaller();
- DebugLoc DLoc = CS->getDebugLoc();
- BasicBlock *Block = CS.getParent();
+ Function *F = CB.getCaller();
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *Block = CB.getParent();
using namespace ore;
OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
@@ -336,12 +381,12 @@ struct VirtualCallSite {
Value *New) {
if (RemarksEnabled)
emitRemark(OptName, TargetName, OREGetter);
- CS->replaceAllUsesWith(New);
- if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) {
- BranchInst::Create(II->getNormalDest(), CS.getInstruction());
+ CB.replaceAllUsesWith(New);
+ if (auto *II = dyn_cast<InvokeInst>(&CB)) {
+ BranchInst::Create(II->getNormalDest(), &CB);
II->getUnwindDest()->removePredecessor(II->getParent());
}
- CS->eraseFromParent();
+ CB.eraseFromParent();
// This use is no longer unsafe.
if (NumUnsafeUses)
--*NumUnsafeUses;
@@ -414,18 +459,18 @@ struct VTableSlotInfo {
// "this"), grouped by argument list.
std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
- void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses);
+ void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
private:
- CallSiteInfo &findCallSiteInfo(CallSite CS);
+ CallSiteInfo &findCallSiteInfo(CallBase &CB);
};
-CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
+CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
std::vector<uint64_t> Args;
- auto *CI = dyn_cast<IntegerType>(CS.getType());
- if (!CI || CI->getBitWidth() > 64 || CS.arg_empty())
+ auto *CBType = dyn_cast<IntegerType>(CB.getType());
+ if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
return CSInfo;
- for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) {
+ for (auto &&Arg : make_range(CB.arg_begin() + 1, CB.arg_end())) {
auto *CI = dyn_cast<ConstantInt>(Arg);
if (!CI || CI->getBitWidth() > 64)
return CSInfo;
@@ -434,11 +479,11 @@ CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
return ConstCSInfo[Args];
}
-void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
+void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
unsigned *NumUnsafeUses) {
- auto &CSI = findCallSiteInfo(CS);
+ auto &CSI = findCallSiteInfo(CB);
CSI.AllCallSitesDevirted = false;
- CSI.CallSites.push_back({VTable, CS, NumUnsafeUses});
+ CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
}
struct DevirtModule {
@@ -454,6 +499,10 @@ struct DevirtModule {
IntegerType *Int32Ty;
IntegerType *Int64Ty;
IntegerType *IntPtrTy;
+ /// Sizeless array type, used for imported vtables. This provides a signal
+ /// to analyzers that these imports may alias, as they do for example
+ /// when multiple unique return values occur in the same vtable.
+ ArrayType *Int8Arr0Ty;
bool RemarksEnabled;
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
@@ -469,6 +518,7 @@ struct DevirtModule {
// eliminate the type check by RAUWing the associated llvm.type.test call with
// true.
std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
+ PatternList FunctionsToSkip;
DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
@@ -482,13 +532,17 @@ struct DevirtModule {
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
+ Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
assert(!(ExportSummary && ImportSummary));
+ FunctionsToSkip.init(SkipFunctionNames);
}
bool areRemarksEnabled();
- void scanTypeTestUsers(Function *TypeTestFunc);
+ void
+ scanTypeTestUsers(Function *TypeTestFunc,
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
void buildTypeIdentifierMap(
@@ -592,12 +646,16 @@ struct DevirtIndex {
MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
+ PatternList FunctionsToSkip;
+
DevirtIndex(
ModuleSummaryIndex &ExportSummary,
std::set<GlobalValue::GUID> &ExportedGUIDs,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
: ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
- LocalWPDTargetsMap(LocalWPDTargetsMap) {}
+ LocalWPDTargetsMap(LocalWPDTargetsMap) {
+ FunctionsToSkip.init(SkipFunctionNames);
+ }
bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
const TypeIdCompatibleVtableInfo TIdInfo,
@@ -702,7 +760,49 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::none();
}
+// Enable whole program visibility if enabled by client (e.g. linker) or
+// internal option, and not force disabled.
+static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
+ return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
+ !DisableWholeProgramVisibility;
+}
+
namespace llvm {
+
+/// If whole program visibility asserted, then upgrade all public vcall
+/// visibility metadata on vtable definitions to linkage unit visibility in
+/// Module IR (for regular or hybrid LTO).
+void updateVCallVisibilityInModule(Module &M,
+ bool WholeProgramVisibilityEnabledInLTO) {
+ if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
+ return;
+ for (GlobalVariable &GV : M.globals())
+ // Add linkage unit visibility to any variable with type metadata, which are
+ // the vtable definitions. We won't have an existing vcall_visibility
+ // metadata on vtable definitions with public visibility.
+ if (GV.hasMetadata(LLVMContext::MD_type) &&
+ GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
+ GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
+}
+
+/// If whole program visibility asserted, then upgrade all public vcall
+/// visibility metadata on vtable definition summaries to linkage unit
+/// visibility in Module summary index (for ThinLTO).
+void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index,
+ bool WholeProgramVisibilityEnabledInLTO) {
+ if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
+ return;
+ for (auto &P : Index) {
+ for (auto &S : P.second.SummaryList) {
+ auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
+ if (!GVar || GVar->vTableFuncs().empty() ||
+ GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
+ continue;
+ GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
+ }
+ }
+}
+
void runWholeProgramDevirtOnIndex(
ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
@@ -737,11 +837,27 @@ void updateIndexWPDForExports(
} // end namespace llvm
+static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
+ // Check that summary index contains regular LTO module when performing
+ // export to prevent occasional use of index from pure ThinLTO compilation
+ // (-fno-split-lto-module). This kind of summary index is passed to
+ // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
+ const auto &ModPaths = Summary->modulePaths();
+ if (ClSummaryAction != PassSummaryAction::Import &&
+ ModPaths.find(ModuleSummaryIndex::getRegularLTOModuleName()) ==
+ ModPaths.end())
+ return createStringError(
+ errc::invalid_argument,
+ "combined summary should contain Regular LTO module");
+ return ErrorSuccess();
+}
+
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
- ModuleSummaryIndex Summary(/*HaveGVs=*/false);
+ std::unique_ptr<ModuleSummaryIndex> Summary =
+ std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
// Handle the command-line summary arguments. This code is for testing
// purposes only, so we handle errors directly.
@@ -750,28 +866,41 @@ bool DevirtModule::runForTesting(
": ");
auto ReadSummaryFile =
ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
-
- yaml::Input In(ReadSummaryFile->getBuffer());
- In >> Summary;
- ExitOnErr(errorCodeToError(In.error()));
+ if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
+ getModuleSummaryIndex(*ReadSummaryFile)) {
+ Summary = std::move(*SummaryOrErr);
+ ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
+ } else {
+ // Try YAML if we've failed with bitcode.
+ consumeError(SummaryOrErr.takeError());
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> *Summary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
}
bool Changed =
- DevirtModule(
- M, AARGetter, OREGetter, LookupDomTree,
- ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
- ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
+ ClSummaryAction == PassSummaryAction::Export ? Summary.get()
+ : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? Summary.get()
+ : nullptr)
.run();
if (!ClWriteSummary.empty()) {
ExitOnError ExitOnErr(
"-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
std::error_code EC;
- raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
- ExitOnErr(errorCodeToError(EC));
-
- yaml::Output Out(OS);
- Out << Summary;
+ if (StringRef(ClWriteSummary).endswith(".bc")) {
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
+ ExitOnErr(errorCodeToError(EC));
+ WriteIndexToFile(*Summary, OS);
+ } else {
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
+ ExitOnErr(errorCodeToError(EC));
+ yaml::Output Out(OS);
+ Out << *Summary;
+ }
}
return Changed;
@@ -818,6 +947,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;
+ // We cannot perform whole program devirtualization analysis on a vtable
+ // with public LTO visibility.
+ if (TM.Bits->GV->getVCallVisibility() ==
+ GlobalObject::VCallVisibilityPublic)
+ return false;
+
Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
TM.Offset + ByteOffset, M);
if (!Ptr)
@@ -827,6 +962,9 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!Fn)
return false;
+ if (FunctionsToSkip.match(Fn->getName()))
+ return false;
+
// We can disregard __cxa_pure_virtual as a possible call target, as
// calls to pure virtuals are UB.
if (Fn->getName() == "__cxa_pure_virtual")
@@ -863,8 +1001,13 @@ bool DevirtIndex::tryFindVirtualCallTargets(
return false;
LocalFound = true;
}
- if (!GlobalValue::isAvailableExternallyLinkage(S->linkage()))
+ if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) {
VS = cast<GlobalVarSummary>(S->getBaseObject());
+ // We cannot perform whole program devirtualization analysis on a vtable
+ // with public LTO visibility.
+ if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
+ return false;
+ }
}
if (!VS->isLive())
continue;
@@ -887,8 +1030,8 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
if (RemarksEnabled)
VCallSite.emitRemark("single-impl",
TheFn->stripPointerCasts()->getName(), OREGetter);
- VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
- TheFn, VCallSite.CS.getCalledValue()->getType()));
+ VCallSite.CB.setCalledOperand(ConstantExpr::getBitCast(
+ TheFn, VCallSite.CB.getCalledOperand()->getType()));
// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)
--*VCallSite.NumUnsafeUses;
@@ -979,7 +1122,7 @@ bool DevirtModule::trySingleImplDevirt(
AddCalls(SlotInfo, TheFnVI);
Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
- Res->SingleImplName = TheFn->getName();
+ Res->SingleImplName = std::string(TheFn->getName());
return true;
}
@@ -1001,6 +1144,11 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
if (!Size)
return false;
+ // Don't devirtualize function if we're told to skip it
+ // in -wholeprogramdevirt-skip.
+ if (FunctionsToSkip.match(TheFn.name()))
+ return false;
+
// If the summary list contains multiple summaries where at least one is
// a local, give up, as we won't know which (possibly promoted) name to use.
for (auto &S : TheFn.getSummaryList())
@@ -1028,10 +1176,10 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
else {
LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
- Res->SingleImplName = TheFn.name();
+ Res->SingleImplName = std::string(TheFn.name());
}
} else
- Res->SingleImplName = TheFn.name();
+ Res->SingleImplName = std::string(TheFn.name());
// Name will be empty if this thin link driven off of serialized combined
// index (e.g. llvm-lto). However, WPD is not supported/invoked for the
@@ -1106,10 +1254,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
if (CSInfo.AllCallSitesDevirted)
return;
for (auto &&VCallSite : CSInfo.CallSites) {
- CallSite CS = VCallSite.CS;
+ CallBase &CB = VCallSite.CB;
// Jump tables are only profitable if the retpoline mitigation is enabled.
- Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features");
+ Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
if (FSAttr.hasAttribute(Attribute::None) ||
!FSAttr.getValueAsString().contains("+retpoline"))
continue;
@@ -1122,42 +1270,40 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// x86_64.
std::vector<Type *> NewArgs;
NewArgs.push_back(Int8PtrTy);
- for (Type *T : CS.getFunctionType()->params())
+ for (Type *T : CB.getFunctionType()->params())
NewArgs.push_back(T);
FunctionType *NewFT =
- FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs,
- CS.getFunctionType()->isVarArg());
+ FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,
+ CB.getFunctionType()->isVarArg());
PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
- IRBuilder<> IRB(CS.getInstruction());
+ IRBuilder<> IRB(&CB);
std::vector<Value *> Args;
Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
- for (unsigned I = 0; I != CS.getNumArgOperands(); ++I)
- Args.push_back(CS.getArgOperand(I));
+ Args.insert(Args.end(), CB.arg_begin(), CB.arg_end());
- CallSite NewCS;
- if (CS.isCall())
+ CallBase *NewCS = nullptr;
+ if (isa<CallInst>(CB))
NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
else
- NewCS = IRB.CreateInvoke(
- NewFT, IRB.CreateBitCast(JT, NewFTPtr),
- cast<InvokeInst>(CS.getInstruction())->getNormalDest(),
- cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args);
- NewCS.setCallingConv(CS.getCallingConv());
+ NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
+ cast<InvokeInst>(CB).getNormalDest(),
+ cast<InvokeInst>(CB).getUnwindDest(), Args);
+ NewCS->setCallingConv(CB.getCallingConv());
- AttributeList Attrs = CS.getAttributes();
+ AttributeList Attrs = CB.getAttributes();
std::vector<AttributeSet> NewArgAttrs;
NewArgAttrs.push_back(AttributeSet::get(
M.getContext(), ArrayRef<Attribute>{Attribute::get(
M.getContext(), Attribute::Nest)}));
for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
NewArgAttrs.push_back(Attrs.getParamAttributes(I));
- NewCS.setAttributes(
+ NewCS->setAttributes(
AttributeList::get(M.getContext(), Attrs.getFnAttributes(),
Attrs.getRetAttributes(), NewArgAttrs));
- CS->replaceAllUsesWith(NewCS.getInstruction());
- CS->eraseFromParent();
+ CB.replaceAllUsesWith(NewCS);
+ CB.eraseFromParent();
// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)
@@ -1208,7 +1354,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
for (auto Call : CSInfo.CallSites)
Call.replaceAndErase(
"uniform-ret-val", FnName, RemarksEnabled, OREGetter,
- ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal));
+ ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
CSInfo.markDevirt();
}
@@ -1273,7 +1419,8 @@ void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
StringRef Name) {
- Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
+ Constant *C =
+ M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
auto *GV = dyn_cast<GlobalVariable>(C);
if (GV)
GV->setVisibility(GlobalValue::HiddenVisibility);
@@ -1313,11 +1460,11 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
bool IsOne,
Constant *UniqueMemberAddr) {
for (auto &&Call : CSInfo.CallSites) {
- IRBuilder<> B(Call.CS.getInstruction());
+ IRBuilder<> B(&Call.CB);
Value *Cmp =
- B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- B.CreateBitCast(Call.VTable, Int8PtrTy), UniqueMemberAddr);
- Cmp = B.CreateZExt(Cmp, Call.CS->getType());
+ B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
+ B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
+ Cmp = B.CreateZExt(Cmp, Call.CB.getType());
Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
Cmp);
}
@@ -1381,8 +1528,8 @@ bool DevirtModule::tryUniqueRetValOpt(
void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Constant *Byte, Constant *Bit) {
for (auto Call : CSInfo.CallSites) {
- auto *RetType = cast<IntegerType>(Call.CS.getType());
- IRBuilder<> B(Call.CS.getInstruction());
+ auto *RetType = cast<IntegerType>(Call.CB.getType());
+ IRBuilder<> B(&Call.CB);
Value *Addr =
B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
if (RetType->getBitWidth() == 1) {
@@ -1507,10 +1654,8 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
// Align the before byte array to the global's minimum alignment so that we
// don't break any alignment requirements on the global.
- MaybeAlign Alignment(B.GV->getAlignment());
- if (!Alignment)
- Alignment =
- Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType()));
+ Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
+ B.GV->getAlign(), B.GV->getValueType());
B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
// Before was stored in reverse order; flip it now.
@@ -1562,13 +1707,14 @@ bool DevirtModule::areRemarksEnabled() {
return false;
}
-void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) {
+void DevirtModule::scanTypeTestUsers(
+ Function *TypeTestFunc,
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
// Find all virtual calls via a virtual table pointer %p under an assumption
// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
- DenseSet<CallSite> SeenCallSites;
for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
I != E;) {
auto CI = dyn_cast<CallInst>(I->getUser());
@@ -1582,29 +1728,59 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) {
auto &DT = LookupDomTree(*CI->getFunction());
findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+ Metadata *TypeId =
+ cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
// If we found any, add them to CallSlots.
if (!Assumes.empty()) {
- Metadata *TypeId =
- cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
- for (DevirtCallSite Call : DevirtCalls) {
- // Only add this CallSite if we haven't seen it before. The vtable
- // pointer may have been CSE'd with pointers from other call sites,
- // and we don't want to process call sites multiple times. We can't
- // just skip the vtable Ptr if it has been seen before, however, since
- // it may be shared by type tests that dominate different calls.
- if (SeenCallSites.insert(Call.CS).second)
- CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS, nullptr);
- }
+ for (DevirtCallSite Call : DevirtCalls)
+ CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
}
- // We no longer need the assumes or the type test.
- for (auto Assume : Assumes)
- Assume->eraseFromParent();
- // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
- // may use the vtable argument later.
- if (CI->use_empty())
- CI->eraseFromParent();
+ auto RemoveTypeTestAssumes = [&]() {
+ // We no longer need the assumes or the type test.
+ for (auto Assume : Assumes)
+ Assume->eraseFromParent();
+ // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
+ // may use the vtable argument later.
+ if (CI->use_empty())
+ CI->eraseFromParent();
+ };
+
+ // At this point we could remove all type test assume sequences, as they
+ // were originally inserted for WPD. However, we can keep these in the
+ // code stream for later analysis (e.g. to help drive more efficient ICP
+ // sequences). They will eventually be removed by a second LowerTypeTests
+ // invocation that cleans them up. In order to do this correctly, the first
+ // LowerTypeTests invocation needs to know that they have "Unknown" type
+ // test resolution, so that they aren't treated as Unsat and lowered to
+ // False, which will break any uses on assumes. Below we remove any type
+ // test assumes that will not be treated as Unknown by LTT.
+
+ // The type test assumes will be treated by LTT as Unsat if the type id is
+ // not used on a global (in which case it has no entry in the TypeIdMap).
+ if (!TypeIdMap.count(TypeId))
+ RemoveTypeTestAssumes();
+
+ // For ThinLTO importing, we need to remove the type test assumes if this is
+ // an MDString type id without a corresponding TypeIdSummary. Any
+ // non-MDString type ids are ignored and treated as Unknown by LTT, so their
+ // type test assumes can be kept. If the MDString type id is missing a
+ // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
+ // exporting phase of WPD from analyzing it), then it would be treated as
+ // Unsat by LTT and we need to remove its type test assumes here. If not
+ // used on a vcall we don't need them for later optimization use in any
+ // case.
+ else if (ImportSummary && isa<MDString>(TypeId)) {
+ const TypeIdSummary *TidSummary =
+ ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
+ if (!TidSummary)
+ RemoveTypeTestAssumes();
+ else
+ // If one was created it should not be Unsat, because if we reached here
+ // the type id was used on a global.
+ assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat);
+ }
}
}
@@ -1680,7 +1856,7 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
if (HasNonCallUses)
++NumUnsafeUses;
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS,
+ CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
&NumUnsafeUses);
}
@@ -1796,8 +1972,13 @@ bool DevirtModule::run() {
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
return false;
+ // Rebuild type metadata into a map for easy lookup.
+ std::vector<VTableBits> Bits;
+ DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
+ buildTypeIdentifierMap(Bits, TypeIdMap);
+
if (TypeTestFunc && AssumeFunc)
- scanTypeTestUsers(TypeTestFunc);
+ scanTypeTestUsers(TypeTestFunc, TypeIdMap);
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
@@ -1808,15 +1989,17 @@ bool DevirtModule::run() {
removeRedundantTypeTests();
+ // We have lowered or deleted the type instrinsics, so we will no
+ // longer have enough information to reason about the liveness of virtual
+ // function pointers in GlobalDCE.
+ for (GlobalVariable &GV : M.globals())
+ GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
+
// The rest of the code is only necessary when exporting or during regular
// LTO, so we are done.
return true;
}
- // Rebuild type metadata into a map for easy lookup.
- std::vector<VTableBits> Bits;
- DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
- buildTypeIdentifierMap(Bits, TypeIdMap);
if (TypeIdMap.empty())
return true;
@@ -1873,14 +2056,22 @@ bool DevirtModule::run() {
// function implementation at offset S.first.ByteOffset, and add to
// TargetsForSlot.
std::vector<VirtualCallTarget> TargetsForSlot;
- if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+ WholeProgramDevirtResolution *Res = nullptr;
+ const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
+ if (ExportSummary && isa<MDString>(S.first.TypeID) &&
+ TypeMemberInfos.size())
+ // For any type id used on a global's type metadata, create the type id
+ // summary resolution regardless of whether we can devirtualize, so that
+ // lower type tests knows the type id is not Unsat. If it was not used on
+ // a global's type metadata, the TypeIdMap entry set will be empty, and
+ // we don't want to create an entry (with the default Unknown type
+ // resolution), which can prevent detection of the Unsat.
+ Res = &ExportSummary
+ ->getOrInsertTypeIdSummary(
+ cast<MDString>(S.first.TypeID)->getString())
+ .WPDRes[S.first.ByteOffset];
+ if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset)) {
- WholeProgramDevirtResolution *Res = nullptr;
- if (ExportSummary && isa<MDString>(S.first.TypeID))
- Res = &ExportSummary
- ->getOrInsertTypeIdSummary(
- cast<MDString>(S.first.TypeID)->getString())
- .WPDRes[S.first.ByteOffset];
if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
DidVirtualConstProp |=
@@ -1893,7 +2084,7 @@ bool DevirtModule::run() {
if (RemarksEnabled)
for (const auto &T : TargetsForSlot)
if (T.WasDevirt)
- DevirtTargets[T.Fn->getName()] = T.Fn;
+ DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
}
// CFI-specific: if we are exporting and any llvm.type.checked.load
@@ -1931,7 +2122,7 @@ bool DevirtModule::run() {
for (VTableBits &B : Bits)
rebuildGlobal(B);
- // We have lowered or deleted the type checked load intrinsics, so we no
+ // We have lowered or deleted the type instrinsics, so we will no
// longer have enough information to reason about the liveness of virtual
// function pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())
@@ -1994,11 +2185,14 @@ void DevirtIndex::run() {
std::vector<ValueInfo> TargetsForSlot;
auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
assert(TidSummary);
+ // Create the type id summary resolution regardlness of whether we can
+ // devirtualize, so that lower type tests knows the type id is used on
+ // a global and not Unsat.
+ WholeProgramDevirtResolution *Res =
+ &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+ .WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
S.first.ByteOffset)) {
- WholeProgramDevirtResolution *Res =
- &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
- .WPDRes[S.first.ByteOffset];
if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
DevirtTargets))