aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Transforms/IPO
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
Diffstat (limited to 'llvm/lib/Transforms/IPO')
-rw-r--r--llvm/lib/Transforms/IPO/AlwaysInliner.cpp47
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp1121
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp462
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp2006
-rw-r--r--llvm/lib/Transforms/IPO/BlockExtractor.cpp11
-rw-r--r--llvm/lib/Transforms/IPO/CalledValuePropagation.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/ConstantMerge.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/CrossDSOCFI.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp574
-rw-r--r--llvm/lib/Transforms/IPO/ExtractGV.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp173
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp19
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp495
-rw-r--r--llvm/lib/Transforms/IPO/GlobalDCE.cpp34
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp177
-rw-r--r--llvm/lib/Transforms/IPO/GlobalSplit.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp26
-rw-r--r--llvm/lib/Transforms/IPO/IPO.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp326
-rw-r--r--llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/InlineSimple.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp111
-rw-r--r--llvm/lib/Transforms/IPO/Internalize.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/LoopExtractor.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp21
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp48
-rw-r--r--llvm/lib/Transforms/IPO/ModuleInliner.cpp25
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp255
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp16
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp295
-rw-r--r--llvm/lib/Transforms/IPO/PruneEH.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/SCCP.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp123
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp287
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp132
39 files changed, 3672 insertions, 3192 deletions
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index a6d9ce1033f3..58cea7ebb749 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -1,4 +1,4 @@
-//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//===- AlwaysInliner.cpp - Code to inline always_inline functions ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,15 +16,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -60,31 +55,38 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
for (User *U : F.users())
if (auto *CB = dyn_cast<CallBase>(U))
if (CB->getCalledFunction() == &F &&
- CB->hasFnAttr(Attribute::AlwaysInline))
- Calls.insert(CB);
+ CB->hasFnAttr(Attribute::AlwaysInline) &&
+ !CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ Calls.insert(CB);
for (CallBase *CB : Calls) {
Function *Caller = CB->getCaller();
OptimizationRemarkEmitter ORE(Caller);
- auto OIC = shouldInline(
- *CB,
- [&](CallBase &CB) {
- return InlineCost::getAlways("always inline attribute");
- },
- ORE);
- assert(OIC);
- emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F,
- *Caller, *OIC, false, DEBUG_TYPE);
+ DebugLoc DLoc = CB->getDebugLoc();
+ BasicBlock *Block = CB->getParent();
InlineFunctionInfo IFI(
/*cg=*/nullptr, GetAssumptionCache, &PSI,
- &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(*Caller),
&FAM.getResult<BlockFrequencyAnalysis>(F));
InlineResult Res = InlineFunction(
*CB, IFI, &FAM.getResult<AAManager>(F), InsertLifetime);
- assert(Res.isSuccess() && "unexpected failure to inline");
- (void)Res;
+ if (!Res.isSuccess()) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
+ Block)
+ << "'" << ore::NV("Callee", &F) << "' is not inlined into '"
+ << ore::NV("Caller", Caller)
+ << "': " << ore::NV("Reason", Res.getFailureReason());
+ });
+ continue;
+ }
+
+ emitInlinedIntoBasedOnCost(
+ ORE, DLoc, Block, F, *Caller,
+ InlineCost::getAlways("always inline attribute"),
+ /*ForProfileContext=*/false, DEBUG_TYPE);
// Merge the attributes based on the inlining.
AttributeFuncs::mergeAttributesForInlining(*Caller, F);
@@ -210,6 +212,9 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
if (!CB.hasFnAttr(Attribute::AlwaysInline))
return InlineCost::getNever("no alwaysinline attribute");
+ if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline())
+ return InlineCost::getNever("noinline call site attribute");
+
auto IsViable = isInlineViable(*Callee);
if (!IsViable.isSuccess())
return InlineCost::getNever(IsViable.getFailureReason());
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index e6a542385662..62cfc3294968 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,9 +29,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -40,15 +39,11 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -56,33 +51,26 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <functional>
-#include <iterator>
-#include <map>
-#include <set>
#include <utility>
#include <vector>
@@ -91,43 +79,81 @@ using namespace llvm;
#define DEBUG_TYPE "argpromotion"
STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
-STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
-STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
-/// A vector used to hold the indices of a single GEP instruction
-using IndicesVector = std::vector<uint64_t>;
+namespace {
+
+struct ArgPart {
+ Type *Ty;
+ Align Alignment;
+ /// A representative guaranteed-executed load or store instruction for use by
+ /// metadata transfer.
+ Instruction *MustExecInstr;
+};
+
+using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
+
+} // end anonymous namespace
+
+static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
+ Value *Ptr, Type *ResElemTy, int64_t Offset) {
+ // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise
+ // fall back to an i8 GEP to a specific offset.
+ unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
+ APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
+ if (!Ptr->getType()->isOpaquePointerTy()) {
+ Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
+ if (OrigOffset == 0 && OrigElemTy == ResElemTy)
+ return Ptr;
+
+ if (OrigElemTy->isSized()) {
+ APInt TmpOffset = OrigOffset;
+ Type *TmpTy = OrigElemTy;
+ SmallVector<APInt> IntIndices =
+ DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
+ if (TmpOffset == 0) {
+ // Try to add trailing zero indices to reach the right type.
+ while (TmpTy != ResElemTy) {
+ Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
+ if (!NextTy)
+ break;
+
+ IntIndices.push_back(APInt::getZero(
+ isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
+ TmpTy = NextTy;
+ }
+
+ SmallVector<Value *> Indices;
+ for (const APInt &Index : IntIndices)
+ Indices.push_back(IRB.getInt(Index));
+
+ if (OrigOffset != 0 || TmpTy == ResElemTy) {
+ Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
+ return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
+ }
+ }
+ }
+ }
+
+ if (OrigOffset != 0) {
+ Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
+ }
+ return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
+}
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function. At this point, we know that it's
/// safe to do so.
static Function *
-doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
- Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
- ReplaceCallSite) {
+doPromotion(Function *F, FunctionAnalysisManager &FAM,
+ const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
+ &ArgsToPromote) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
FunctionType *FTy = F->getFunctionType();
std::vector<Type *> Params;
- using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;
-
- // ScalarizedElements - If we are promoting a pointer that has elements
- // accessed out of it, keep track of which elements are accessed so that we
- // can add one argument for each.
- //
- // Arguments that are directly loaded will have a zero element value here, to
- // handle cases where there are both a direct load and GEP accesses.
- std::map<Argument *, ScalarizeTable> ScalarizedElements;
-
- // OriginalLoads - Keep track of a representative load instruction from the
- // original function so that we can tell the alias analysis implementation
- // what the new GEP/Load instructions we are inserting look like.
- // We need to keep the original loads for each argument and the elements
- // of the argument that are accessed.
- std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;
-
// Attribute - Keep track of the parameter attributes for the arguments
// that we are *not* promoting. For the ones that we do promote, the parameter
// attributes are lost
@@ -138,15 +164,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgNo) {
- if (ByValArgsToTransform.count(&*I)) {
- // Simple byval argument? Just add all the struct element types.
- Type *AgTy = I->getParamByValType();
- StructType *STy = cast<StructType>(AgTy);
- llvm::append_range(Params, STy->elements());
- ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
- AttributeSet());
- ++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(&*I)) {
+ if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
@@ -154,58 +172,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
} else {
- // Okay, this is being promoted. This means that the only uses are loads
- // or GEPs which are only used by loads
-
- // In this table, we will track which indices are loaded from the argument
- // (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : make_early_inc_range(I->users())) {
- Instruction *UI = cast<Instruction>(U);
- Type *SrcTy;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- SrcTy = L->getType();
- else
- SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
- // Skip dead GEPs and remove them.
- if (isa<GetElementPtrInst>(UI) && UI->use_empty()) {
- UI->eraseFromParent();
- continue;
- }
-
- IndicesVector Indices;
- Indices.reserve(UI->getNumOperands() - 1);
- // Since loads will only have a single operand, and GEPs only a single
- // non-index operand, this will record direct loads without any indices,
- // and gep+loads with the GEP indices.
- for (const Use &I : llvm::drop_begin(UI->operands()))
- Indices.push_back(cast<ConstantInt>(I)->getSExtValue());
- // GEPs with a single 0 index can be merged with direct loads
- if (Indices.size() == 1 && Indices.front() == 0)
- Indices.clear();
- ArgIndices.insert(std::make_pair(SrcTy, Indices));
- LoadInst *OrigLoad;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- OrigLoad = L;
- else
- // Take any load, we will use it only to update Alias Analysis
- OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
- }
-
- // Add a parameter to the function for each element passed in.
- for (const auto &ArgIndex : ArgIndices) {
- // not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(
- I->getType()->getPointerElementType(), ArgIndex.second));
+ const auto &ArgParts = ArgsToPromote.find(&*I)->second;
+ for (const auto &Pair : ArgParts) {
+ Params.push_back(Pair.second.Ty);
ArgAttrVec.push_back(AttributeSet());
- assert(Params.back());
}
-
- if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
- ++NumArgumentsPromoted;
- else
- ++NumAggregatesPromoted;
+ ++NumArgumentsPromoted;
}
}
@@ -222,24 +194,30 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// The new function will have the !dbg metadata copied from the original
// function. The original function may not be deleted, and dbg metadata need
- // to be unique so we need to drop it.
+ // to be unique, so we need to drop it.
F->setSubprogram(nullptr);
LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
+ uint64_t LargestVectorWidth = 0;
+ for (auto *I : Params)
+ if (auto *VT = dyn_cast<llvm::VectorType>(I))
+ LargestVectorWidth = std::max(
+ LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
+
// Recompute the parameter attributes list based on the new arguments for
// the function.
NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
PAL.getRetAttrs(), ArgAttrVec));
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
ArgAttrVec.clear();
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in the loaded pointers.
- //
+ // Loop over all the callers of the function, transforming the call sites to
+ // pass in the loaded pointers.
SmallVector<Value *, 16> Args;
const DataLayout &DL = F->getParent()->getDataLayout();
while (!F->use_empty()) {
@@ -250,74 +228,34 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
- auto AI = CB.arg_begin();
+ auto *AI = CB.arg_begin();
ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++AI, ++ArgNo)
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ ++I, ++AI, ++ArgNo) {
+ if (!ArgsToPromote.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
- } else if (ByValArgsToTransform.count(&*I)) {
- // Emit a GEP and load for each element of the struct.
- Type *AgTy = I->getParamByValType();
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
- const StructLayout *SL = DL.getStructLayout(STy);
- Align StructAlign = *I->getParamAlign();
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- auto *Idx =
- IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
- // TODO: Tell AA about the new values?
- Align Alignment =
- commonAlignment(StructAlign, SL->getElementOffset(i));
- Args.push_back(IRB.CreateAlignedLoad(
- STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val"));
- ArgAttrVec.push_back(AttributeSet());
- }
} else if (!I->use_empty()) {
- // Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- // Store the Value* version of the indices in here, but declare it now
- // for reuse.
- std::vector<Value *> Ops;
- for (const auto &ArgIndex : ArgIndices) {
- Value *V = *AI;
- LoadInst *OrigLoad =
- OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
- if (!ArgIndex.second.empty()) {
- Ops.reserve(ArgIndex.second.size());
- Type *ElTy = V->getType();
- for (auto II : ArgIndex.second) {
- // Use i32 to index structs, and i64 for others (pointers/arrays).
- // This satisfies GEP constraints.
- Type *IdxTy =
- (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
- : Type::getInt64Ty(F->getContext()));
- Ops.push_back(ConstantInt::get(IdxTy, II));
- // Keep track of the type we're currently indexing.
- if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
- ElTy = ElPTy->getPointerElementType();
- else
- ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II);
- }
- // And create a GEP to extract those indices.
- V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
- Ops.clear();
+ Value *V = *AI;
+ const auto &ArgParts = ArgsToPromote.find(&*I)->second;
+ for (const auto &Pair : ArgParts) {
+ LoadInst *LI = IRB.CreateAlignedLoad(
+ Pair.second.Ty,
+ createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
+ Pair.second.Alignment, V->getName() + ".val");
+ if (Pair.second.MustExecInstr) {
+ LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
+ LI->copyMetadata(*Pair.second.MustExecInstr,
+ {LLVMContext::MD_range, LLVMContext::MD_nonnull,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_align, LLVMContext::MD_noundef});
}
- // Since we're replacing a load make sure we take the alignment
- // of the previous load.
- LoadInst *newLoad =
- IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
- newLoad->setAlignment(OrigLoad->getAlign());
- // Transfer the AA info too.
- newLoad->setAAMetadata(OrigLoad->getAAMetadata());
-
- Args.push_back(newLoad);
+ Args.push_back(LI);
ArgAttrVec.push_back(AttributeSet());
}
}
+ }
// Push any varargs arguments on the list.
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
@@ -345,9 +283,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
Args.clear();
ArgAttrVec.clear();
- // Update the callgraph to know that the callsite has been transformed.
- if (ReplaceCallSite)
- (*ReplaceCallSite)(CB, *NewCS);
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
+ LargestVectorWidth);
if (!CB.use_empty()) {
CB.replaceAllUsesWith(NewCS);
@@ -364,11 +301,15 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// function empty.
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+ // We will collect all the new created allocas to promote them into registers
+ // after the following loop
+ SmallVector<AllocaInst *, 4> Allocas;
+
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
Function::arg_iterator I2 = NF->arg_begin();
for (Argument &Arg : F->args()) {
- if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
+ if (!ArgsToPromote.count(&Arg)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
Arg.replaceAllUsesWith(&*I2);
@@ -377,37 +318,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
continue;
}
- if (ByValArgsToTransform.count(&Arg)) {
- // In the callee, we create an alloca, and store each of the new incoming
- // arguments into the alloca.
- Instruction *InsertPt = &NF->begin()->front();
-
- // Just add all the struct element types.
- Type *AgTy = Arg.getParamByValType();
- Align StructAlign = *Arg.getParamAlign();
- Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
- StructAlign, "", InsertPt);
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
- nullptr};
- const StructLayout *SL = DL.getStructLayout(STy);
-
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
- InsertPt);
- I2->setName(Arg.getName() + "." + Twine(i));
- Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
- new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
- }
-
- // Anything that used the arg should now use the alloca.
- Arg.replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&Arg);
- continue;
- }
-
// There potentially are metadata uses for things like llvm.dbg.value.
// Replace them with undef, after handling the other regular uses.
auto RauwUndefMetadata = make_scope_exit(
@@ -416,57 +326,95 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
if (Arg.use_empty())
continue;
- // Otherwise, if we promoted this argument, then all users are load
- // instructions (or GEPs with only load users), and all loads should be
- // using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&Arg];
+ // Otherwise, if we promoted this argument, we have to create an alloca in
+ // the callee for every promotable part and store each of the new incoming
+ // arguments into the corresponding alloca, what lets the old code (the
+ // store instructions if they are allowed especially) a chance to work as
+ // before.
+ assert(Arg.getType()->isPointerTy() &&
+ "Only arguments with a pointer type are promotable");
- while (!Arg.use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) {
- assert(ArgIndices.begin()->second.empty() &&
- "Load element should sort to front!");
- I2->setName(Arg.getName() + ".val");
- LI->replaceAllUsesWith(&*I2);
- LI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName()
- << "' in function '" << F->getName() << "'\n");
- } else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back());
- assert(!GEP->use_empty() &&
- "GEPs without uses should be cleaned up already");
- IndicesVector Operands;
- Operands.reserve(GEP->getNumIndices());
- for (const Use &Idx : GEP->indices())
- Operands.push_back(cast<ConstantInt>(Idx)->getSExtValue());
+ IRBuilder<NoFolder> IRB(&NF->begin()->front());
- // GEPs with a single 0 index can be merged with direct loads
- if (Operands.size() == 1 && Operands.front() == 0)
- Operands.clear();
+ // Add only the promoted elements, so parts from ArgsToPromote
+ SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
+ for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
+ int64_t Offset = Pair.first;
+ const ArgPart &Part = Pair.second;
- Function::arg_iterator TheArg = I2;
- for (ScalarizeTable::iterator It = ArgIndices.begin();
- It->second != Operands; ++It, ++TheArg) {
- assert(It != ArgIndices.end() && "GEP not handled??");
- }
+ Argument *NewArg = I2++;
+ NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
+
+ AllocaInst *NewAlloca = IRB.CreateAlloca(
+ Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
+ NewAlloca->setAlignment(Pair.second.Alignment);
+ IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
- TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(),
- make_range(Operands.begin(), Operands.end())));
+ // Collect the alloca to retarget the users to
+ OffsetToAlloca.insert({Offset, NewAlloca});
+ }
- LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
- << "' of function '" << NF->getName() << "'\n");
+ auto GetAlloca = [&](Value *Ptr) {
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ true);
+ assert(Ptr == &Arg && "Not constant offset from arg?");
+ return OffsetToAlloca.lookup(Offset.getSExtValue());
+ };
- // All of the uses must be load instructions. Replace them all with
- // the argument specified by ArgNo.
- while (!GEP->use_empty()) {
- LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(&*TheArg);
- L->eraseFromParent();
- }
- GEP->eraseFromParent();
+ // Cleanup the code from the dead instructions: GEPs and BitCasts in between
+ // the original argument and its users: loads and stores. Retarget every
+ // user to the new created alloca.
+ SmallVector<Value *, 16> Worklist;
+ SmallVector<Instruction *, 16> DeadInsts;
+ append_range(Worklist, Arg.users());
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
+ DeadInsts.push_back(cast<Instruction>(V));
+ append_range(Worklist, V->users());
+ continue;
+ }
+
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ Value *Ptr = LI->getPointerOperand();
+ LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
+ continue;
+ }
+
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
+ Value *Ptr = SI->getPointerOperand();
+ SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
+ continue;
}
+
+ llvm_unreachable("Unexpected user");
+ }
+
+ for (Instruction *I : DeadInsts) {
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+ I->eraseFromParent();
+ }
+
+ // Collect the allocas for promotion
+ for (const auto &Pair : OffsetToAlloca) {
+ assert(isAllocaPromotable(Pair.second) &&
+ "By design, only promotable allocas should be produced.");
+ Allocas.push_back(Pair.second);
}
- // Increment I2 past all of the arguments added for this promoted pointer.
- std::advance(I2, ArgIndices.size());
+ }
+
+ LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
+ << " alloca(s) are promotable by Mem2Reg\n");
+
+ if (!Allocas.empty()) {
+ // And we are able to call the `promoteMemoryToRegister()` function.
+ // Our earlier checks have ensured that PromoteMemToReg() will
+ // succeed.
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
+ auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
+ PromoteMemToReg(Allocas, DT, &AC);
}
return NF;
@@ -474,100 +422,37 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
/// Return true if we can prove that all callees pass in a valid pointer for the
/// specified function argument.
-static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
+static bool allCallersPassValidPointerForArgument(Argument *Arg,
+ Align NeededAlign,
+ uint64_t NeededDerefBytes) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
+ APInt Bytes(64, NeededDerefBytes);
- unsigned ArgNo = Arg->getArgNo();
+ // Check if the argument itself is marked dereferenceable and aligned.
+ if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
+ return true;
// Look at all call sites of the function. At this point we know we only have
// direct callees.
- for (User *U : Callee->users()) {
+ return all_of(Callee->users(), [&](User *U) {
CallBase &CB = cast<CallBase>(*U);
-
- if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL))
- return false;
- }
- return true;
-}
-
-/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
-/// that is greater than or equal to the size of prefix, and each of the
-/// elements in Prefix is the same as the corresponding elements in Longer.
-///
-/// This means it also returns true when Prefix and Longer are equal!
-static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
- if (Prefix.size() > Longer.size())
- return false;
- return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
-}
-
-/// Checks if Indices, or a prefix of Indices, is in Set.
-static bool prefixIn(const IndicesVector &Indices,
- std::set<IndicesVector> &Set) {
- std::set<IndicesVector>::iterator Low;
- Low = Set.upper_bound(Indices);
- if (Low != Set.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This means
- // it points to a prefix of Indices (possibly Indices itself), if such
- // prefix exists.
- //
- // This load is safe if any prefix of its operands is safe to load.
- return Low != Set.end() && isPrefix(*Low, Indices);
+ return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
+ NeededAlign, Bytes, DL);
+ });
}
-/// Mark the given indices (ToMark) as safe in the given set of indices
-/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
-/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
-/// already. Furthermore, any indices that Indices is itself a prefix of, are
-/// removed from Safe (since they are implicitely safe because of Indices now).
-static void markIndicesSafe(const IndicesVector &ToMark,
- std::set<IndicesVector> &Safe) {
- std::set<IndicesVector>::iterator Low;
- Low = Safe.upper_bound(ToMark);
- // Guard against the case where Safe is empty
- if (Low != Safe.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This
- // means it points to a prefix of Indices (possibly Indices itself), if
- // such prefix exists.
- if (Low != Safe.end()) {
- if (isPrefix(*Low, ToMark))
- // If there is already a prefix of these indices (or exactly these
- // indices) marked a safe, don't bother adding these indices
- return;
-
- // Increment Low, so we can use it as a "insert before" hint
- ++Low;
- }
- // Insert
- Low = Safe.insert(Low, ToMark);
- ++Low;
- // If there we're a prefix of longer index list(s), remove those
- std::set<IndicesVector>::iterator End = Safe.end();
- while (Low != End && isPrefix(ToMark, *Low)) {
- std::set<IndicesVector>::iterator Remove = Low;
- ++Low;
- Safe.erase(Remove);
- }
-}
-
-/// isSafeToPromoteArgument - As you might guess from the name of this method,
-/// it checks to see if it is both safe and useful to promote the argument.
-/// This method limits promotion of aggregates to only promote up to three
-/// elements of the aggregate in order to avoid exploding the number of
-/// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR,
- unsigned MaxElements) {
- using GEPIndicesSet = std::set<IndicesVector>;
-
+/// Determine that this argument is safe to promote, and find the argument
+/// parts it can be promoted into.
+static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
+ unsigned MaxElements, bool IsRecursive,
+ SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
// Quick exit for unused arguments
if (Arg->use_empty())
return true;
- // We can only promote this argument if all of the uses are loads, or are GEP
- // instructions (with constant indices) that are subsequently loaded.
+ // We can only promote this argument if all the uses are loads at known
+ // offsets.
//
// Promoting the argument causes it to be loaded in the caller
// unconditionally. This is only safe if we can prove that either the load
@@ -578,157 +463,193 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
// anyway, in the latter case, invalid loads won't happen. This prevents us
// from introducing an invalid load that wouldn't have happened in the
// original code.
- //
- // This set will contain all sets of indices that are loaded in the entry
- // block, and thus are safe to unconditionally load in the caller.
- GEPIndicesSet SafeToUnconditionallyLoad;
- // This set contains all the sets of indices that we are planning to promote.
- // This makes it possible to limit the number of arguments added.
- GEPIndicesSet ToPromote;
+ SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
+ Align NeededAlign(1);
+ uint64_t NeededDerefBytes = 0;
- // If the pointer is always valid, any load with first index 0 is valid.
+ // And if this is a byval argument we also allow to have store instructions.
+ // Only handle in such way arguments with specified alignment;
+ // if it's unspecified, the actual alignment of the argument is
+ // target-specific.
+ bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
- if (ByValTy)
- SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+ // An end user of a pointer argument is a load or store instruction.
+ // Returns None if this load or store is not based on the argument. Return
+ // true if we can promote the instruction, false otherwise.
+ auto HandleEndUser = [&](auto *I, Type *Ty,
+ bool GuaranteedToExecute) -> Optional<bool> {
+ // Don't promote volatile or atomic instructions.
+ if (!I->isSimple())
+ return false;
- // Whenever a new underlying type for the operand is found, make sure it's
- // consistent with the GEPs and loads we've already seen and, if necessary,
- // use it to see if all incoming pointers are valid (which implies the 0-index
- // is safe).
- Type *BaseTy = ByValTy;
- auto UpdateBaseTy = [&](Type *NewBaseTy) {
- if (BaseTy)
- return BaseTy == NewBaseTy;
+ Value *Ptr = I->getPointerOperand();
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ true);
+ if (Ptr != Arg)
+ return None;
- BaseTy = NewBaseTy;
- if (allCallersPassValidPointerForArgument(Arg, BaseTy)) {
- assert(SafeToUnconditionallyLoad.empty());
- SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
- }
+ if (Offset.getSignificantBits() >= 64)
+ return false;
- return true;
- };
+ TypeSize Size = DL.getTypeStoreSize(Ty);
+ // Don't try to promote scalable types.
+ if (Size.isScalable())
+ return false;
- // First, iterate functions that are guaranteed to execution on function
- // entry and mark loads of (geps of) arguments as safe.
- BasicBlock &EntryBlock = Arg->getParent()->front();
- // Declare this here so we can reuse it
- IndicesVector Indices;
- for (Instruction &I : EntryBlock) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- Value *V = LI->getPointerOperand();
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
- V = GEP->getPointerOperand();
- if (V == Arg) {
- // This load actually loads (part of) Arg? Check the indices then.
- Indices.reserve(GEP->getNumIndices());
- for (Use &Idx : GEP->indices())
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
- Indices.push_back(CI->getSExtValue());
- else
- // We found a non-constant GEP index for this argument? Bail out
- // right away, can't promote this argument at all.
- return false;
+ // If this is a recursive function and one of the types is a pointer,
+ // then promoting it might lead to recursive promotion.
+ if (IsRecursive && Ty->isPointerTy())
+ return false;
- if (!UpdateBaseTy(GEP->getSourceElementType()))
- return false;
+ int64_t Off = Offset.getSExtValue();
+ auto Pair = ArgParts.try_emplace(
+ Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
+ ArgPart &Part = Pair.first->second;
+ bool OffsetNotSeenBefore = Pair.second;
- // Indices checked out, mark them as safe
- markIndicesSafe(Indices, SafeToUnconditionallyLoad);
- Indices.clear();
- }
- } else if (V == Arg) {
- // Direct loads are equivalent to a GEP with a single 0 index.
- markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ // We limit promotion to only promoting up to a fixed number of elements of
+ // the aggregate.
+ if (MaxElements > 0 && ArgParts.size() > MaxElements) {
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "more than " << MaxElements << " parts\n");
+ return false;
+ }
- if (BaseTy && LI->getType() != BaseTy)
- return false;
+ // For now, we only support loading/storing one specific type at a given
+ // offset.
+ if (Part.Ty != Ty) {
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "accessed as both " << *Part.Ty << " and " << *Ty
+ << " at offset " << Off << "\n");
+ return false;
+ }
- BaseTy = LI->getType();
- }
+ // If this instruction is not guaranteed to execute, and we haven't seen a
+ // load or store at this offset before (or it had lower alignment), then we
+ // need to remember that requirement.
+ // Note that skipping instructions of previously seen offsets is only
+ // correct because we only allow a single type for a given offset, which
+ // also means that the number of accessed bytes will be the same.
+ if (!GuaranteedToExecute &&
+ (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
+ // We won't be able to prove dereferenceability for negative offsets.
+ if (Off < 0)
+ return false;
+
+ // If the offset is not aligned, an aligned base pointer won't help.
+ if (!isAligned(I->getAlign(), Off))
+ return false;
+
+ NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
+ NeededAlign = std::max(NeededAlign, I->getAlign());
}
+ Part.Alignment = std::max(Part.Alignment, I->getAlign());
+ return true;
+ };
+
+ // Look for loads and stores that are guaranteed to execute on entry.
+ for (Instruction &I : Arg->getParent()->getEntryBlock()) {
+ Optional<bool> Res{};
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
+ /* GuaranteedToExecute */ true);
+ if (Res && !*Res)
+ return false;
+
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
}
- // Now, iterate all uses of the argument to see if there are any uses that are
- // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ // Now look at all loads of the argument. Remember the load instructions
+ // for the aliasing check below.
+ SmallVector<const Use *, 16> Worklist;
+ SmallPtrSet<const Use *, 16> Visited;
SmallVector<LoadInst *, 16> Loads;
- IndicesVector Operands;
- for (Use &U : Arg->uses()) {
- User *UR = U.getUser();
- Operands.clear();
- if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
- // Don't hack volatile/atomic loads
- if (!LI->isSimple())
- return false;
- Loads.push_back(LI);
- // Direct loads are equivalent to a GEP with a zero index and then a load.
- Operands.push_back(0);
+ auto AppendUses = [&](const Value *V) {
+ for (const Use &U : V->uses())
+ if (Visited.insert(&U).second)
+ Worklist.push_back(&U);
+ };
+ AppendUses(Arg);
+ while (!Worklist.empty()) {
+ const Use *U = Worklist.pop_back_val();
+ Value *V = U->getUser();
+ if (isa<BitCastInst>(V)) {
+ AppendUses(V);
+ continue;
+ }
- if (!UpdateBaseTy(LI->getType()))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ if (!GEP->hasAllConstantIndices())
return false;
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
- if (GEP->use_empty()) {
- // Dead GEP's cause trouble later. Just remove them if we run into
- // them.
- continue;
- }
+ AppendUses(V);
+ continue;
+ }
- if (!UpdateBaseTy(GEP->getSourceElementType()))
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
return false;
+ Loads.push_back(LI);
+ continue;
+ }
- // Ensure that all of the indices are constants.
- for (Use &Idx : GEP->indices())
- if (ConstantInt *C = dyn_cast<ConstantInt>(Idx))
- Operands.push_back(C->getSExtValue());
- else
- return false; // Not a constant operand GEP!
-
- // Ensure that the only users of the GEP are load instructions.
- for (User *GEPU : GEP->users())
- if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
- // Don't hack volatile/atomic loads
- if (!LI->isSimple())
- return false;
- Loads.push_back(LI);
- } else {
- // Other uses than load?
- return false;
- }
- } else {
- return false; // Not a load or a GEP.
+ // Stores are allowed for byval arguments
+ auto *SI = dyn_cast<StoreInst>(V);
+ if (AreStoresAllowed && SI &&
+ U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
+ if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
+ /* GuaranteedToExecute */ false))
+ return false;
+ continue;
+ // Only stores TO the argument is allowed, all the other stores are
+ // unknown users
}
- // Now, see if it is safe to promote this load / loads of this GEP. Loading
- // is safe if Operands, or a prefix of Operands, is marked as safe.
- if (!prefixIn(Operands, SafeToUnconditionallyLoad))
- return false;
+ // Unknown user.
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "unknown user " << *V << "\n");
+ return false;
+ }
- // See if we are already promoting a load with these indices. If not, check
- // to make sure that we aren't promoting too many elements. If so, nothing
- // to do.
- if (ToPromote.find(Operands) == ToPromote.end()) {
- if (MaxElements > 0 && ToPromote.size() == MaxElements) {
- LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '"
- << Arg->getName()
- << "' because it would require adding more "
- << "than " << MaxElements
- << " arguments to the function.\n");
- // We limit aggregate promotion to only promoting up to a fixed number
- // of elements of the aggregate.
- return false;
- }
- ToPromote.insert(std::move(Operands));
+ if (NeededDerefBytes || NeededAlign > 1) {
+ // Try to prove a required deref / aligned requirement.
+ if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
+ NeededDerefBytes)) {
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "not dereferenceable or aligned\n");
+ return false;
}
}
- if (Loads.empty())
+ if (ArgParts.empty())
return true; // No users, this is a dead argument.
- // Okay, now we know that the argument is only used by load instructions and
+ // Sort parts by offset.
+ append_range(ArgPartsVec, ArgParts);
+ sort(ArgPartsVec,
+ [](const auto &A, const auto &B) { return A.first < B.first; });
+
+ // Make sure the parts are non-overlapping.
+ int64_t Offset = ArgPartsVec[0].first;
+ for (const auto &Pair : ArgPartsVec) {
+ if (Pair.first < Offset)
+ return false; // Overlap with previous part.
+
+ Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
+ }
+
+ // If store instructions are allowed, the path from the entry of the function
+ // to each load may be not free of instructions that potentially invalidate
+ // the load, and this is an admissible situation.
+ if (AreStoresAllowed)
+ return true;
+
+ // Okay, now we know that the argument is only used by load instructions, and
// it is safe to unconditionally perform all of them. Use alias analysis to
// check to see if the pointer is guaranteed to not be modified from entry of
// the function to each of the load instructions.
@@ -762,118 +683,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
return true;
}
-bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
- // There is no size information, so be conservative.
- if (!type->isSized())
- return false;
-
- // If the alloc size is not equal to the storage size, then there are padding
- // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
- return false;
-
- // FIXME: This isn't the right way to check for padding in vectors with
- // non-byte-size elements.
- if (VectorType *seqTy = dyn_cast<VectorType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
-
- // For array types, check for padding within members.
- if (ArrayType *seqTy = dyn_cast<ArrayType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
-
- if (!isa<StructType>(type))
- return true;
-
- // Check for padding within and between elements of a struct.
- StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL.getStructLayout(StructTy);
- uint64_t StartPos = 0;
- for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
- Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy, DL))
- return false;
- if (StartPos != Layout->getElementOffsetInBits(i))
- return false;
- StartPos += DL.getTypeAllocSizeInBits(ElTy);
- }
-
- return true;
-}
-
-/// Checks if the padding bytes of an argument could be accessed.
-static bool canPaddingBeAccessed(Argument *arg) {
- assert(arg->hasByValAttr());
-
- // Track all the pointers to the argument to make sure they are not captured.
- SmallPtrSet<Value *, 16> PtrValues;
- PtrValues.insert(arg);
-
- // Track all of the stores.
- SmallVector<StoreInst *, 16> Stores;
-
- // Scan through the uses recursively to make sure the pointer is always used
- // sanely.
- SmallVector<Value *, 16> WorkList(arg->users());
- while (!WorkList.empty()) {
- Value *V = WorkList.pop_back_val();
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
- if (PtrValues.insert(V).second)
- llvm::append_range(WorkList, V->users());
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
- Stores.push_back(Store);
- } else if (!isa<LoadInst>(V)) {
- return true;
- }
- }
-
- // Check to make sure the pointers aren't captured
- for (StoreInst *Store : Stores)
- if (PtrValues.count(Store->getValueOperand()))
- return true;
-
- return false;
-}
-
-/// Check if callers and the callee \p F agree how promoted arguments would be
-/// passed. The ones that they do not agree on are eliminated from the sets but
-/// the return value has to be observed as well.
-static bool areFunctionArgsABICompatible(
- const Function &F, const TargetTransformInfo &TTI,
- SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
- // TODO: Check individual arguments so we can promote a subset?
- SmallVector<Type *, 32> Types;
- for (Argument *Arg : ArgsToPromote)
- Types.push_back(Arg->getType()->getPointerElementType());
- for (Argument *Arg : ByValArgsToTransform)
- Types.push_back(Arg->getParamByValType());
-
- for (const Use &U : F.uses()) {
+/// Check if callers and callee agree on how promoted arguments would be
+/// passed.
+static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
+ const TargetTransformInfo &TTI) {
+ return all_of(F.uses(), [&](const Use &U) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (!CB)
return false;
+
const Function *Caller = CB->getCaller();
const Function *Callee = CB->getCalledFunction();
- if (!TTI.areTypesABICompatible(Caller, Callee, Types))
- return false;
- }
- return true;
+ return TTI.areTypesABICompatible(Caller, Callee, Types);
+ });
}
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct). If safe to promote some arguments, it
/// calls the DoPromotion method.
-static Function *
-promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements,
- Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
- ReplaceCallSite,
- const TargetTransformInfo &TTI) {
+static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
+ unsigned MaxElements, bool IsRecursive) {
// Don't perform argument promotion for naked functions; otherwise we can end
// up removing parameters that are seemingly 'not used' as they are referred
// to in the assembly.
- if(F->hasFnAttribute(Attribute::Naked))
+ if (F->hasFnAttribute(Attribute::Naked))
return nullptr;
// Make sure that it is local to this module.
@@ -903,20 +737,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// Second check: make sure that all callers are direct callers. We can't
// transform functions that have indirect callers. Also see if the function
- // is self-recursive and check that target features are compatible.
- bool isSelfRecursive = false;
+ // is self-recursive.
for (Use &U : F->uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
// Must be a direct call.
- if (CB == nullptr || !CB->isCallee(&U))
+ if (CB == nullptr || !CB->isCallee(&U) ||
+ CB->getFunctionType() != F->getFunctionType())
return nullptr;
// Can't change signature of musttail callee
if (CB->isMustTailCall())
return nullptr;
- if (CB->getParent()->getParent() == F)
- isSelfRecursive = true;
+ if (CB->getFunction() == F)
+ IsRecursive = true;
}
// Can't change signature of musttail caller
@@ -926,16 +760,13 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
return nullptr;
const DataLayout &DL = F->getParent()->getDataLayout();
-
- AAResults &AAR = AARGetter(*F);
+ auto &AAR = FAM.getResult<AAManager>(*F);
+ const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
- SmallPtrSet<Argument *, 8> ArgsToPromote;
- SmallPtrSet<Argument *, 8> ByValArgsToTransform;
+ DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
for (Argument *PtrArg : PointerArgs) {
- Type *AgTy = PtrArg->getType()->getPointerElementType();
-
// Replace sret attribute with noalias. This reduces register pressure by
// avoiding a register copy.
if (PtrArg->hasStructRetAttr()) {
@@ -949,72 +780,25 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
}
}
- // If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed.
- //
- // Only handle arguments with specified alignment; if it's unspecified, the
- // actual alignment of the argument is target-specific.
- bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() &&
- (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
- !canPaddingBeAccessed(PtrArg));
- if (isSafeToPromote) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
- LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '"
- << PtrArg->getName()
- << "' because it would require adding more"
- << " than " << MaxElements
- << " arguments to the function.\n");
- continue;
- }
+ // If we can promote the pointer to its value.
+ SmallVector<OffsetAndArgPart, 4> ArgParts;
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (const auto *EltTy : STy->elements()) {
- if (!EltTy->isSingleValueType()) {
- AllSimple = false;
- break;
- }
- }
-
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow sroa to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
- }
- }
- }
+ if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
+ SmallVector<Type *, 4> Types;
+ for (const auto &Pair : ArgParts)
+ Types.push_back(Pair.second.Ty);
- // If the argument is a recursive type and we're in a recursive
- // function, we could end up infinitely peeling the function argument.
- if (isSelfRecursive) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- bool RecursiveType =
- llvm::is_contained(STy->elements(), PtrArg->getType());
- if (RecursiveType)
- continue;
+ if (areTypesABICompatible(Types, *F, TTI)) {
+ ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
}
}
-
- // Otherwise, see if we can promote the pointer to its value.
- Type *ByValTy =
- PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr;
- if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements))
- ArgsToPromote.insert(PtrArg);
}
// No promotable pointer arguments.
- if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ if (ArgsToPromote.empty())
return nullptr;
- if (!areFunctionArgsABICompatible(
- *F, TTI, ArgsToPromote, ByValArgsToTransform))
- return nullptr;
-
- return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
+ return doPromotion(F, FAM, ArgsToPromote);
}
PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
@@ -1030,19 +814,10 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ bool IsRecursive = C.size() > 1;
for (LazyCallGraph::Node &N : C) {
Function &OldF = N.getFunction();
-
- // FIXME: This lambda must only be used with this function. We should
- // skip the lambda and just get the AA results directly.
- auto AARGetter = [&](Function &F) -> AAResults & {
- assert(&F == &OldF && "Called with an unexpected function!");
- return FAM.getResult<AAManager>(F);
- };
-
- const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
- Function *NewF =
- promoteArguments(&OldF, AARGetter, MaxElements, None, TTI);
+ Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
if (!NewF)
continue;
LocalChange = true;
@@ -1077,111 +852,3 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}
-
-namespace {
-
-/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
-struct ArgPromotion : public CallGraphSCCPass {
- // Pass identification, replacement for typeid
- static char ID;
-
- explicit ArgPromotion(unsigned MaxElements = 3)
- : CallGraphSCCPass(ID), MaxElements(MaxElements) {
- initializeArgPromotionPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
-private:
- using llvm::Pass::doInitialization;
-
- bool doInitialization(CallGraph &CG) override;
-
- /// The maximum number of elements to expand, or 0 for unlimited.
- unsigned MaxElements;
-};
-
-} // end anonymous namespace
-
-char ArgPromotion::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
-
-Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
- return new ArgPromotion(MaxElements);
-}
-
-bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
-
- // Get the callgraph information that we need to update to reflect our
- // changes.
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-
- LegacyAARGetter AARGetter(*this);
-
- bool Changed = false, LocalChange;
-
- // Iterate until we stop promoting from this SCC.
- do {
- LocalChange = false;
- // Attempt to promote arguments from all functions in this SCC.
- for (CallGraphNode *OldNode : SCC) {
- Function *OldF = OldNode->getFunction();
- if (!OldF)
- continue;
-
- auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) {
- Function *Caller = OldCS.getParent()->getParent();
- CallGraphNode *NewCalleeNode =
- CG.getOrInsertFunction(NewCS.getCalledFunction());
- CallGraphNode *CallerNode = CG[Caller];
- CallerNode->replaceCallEdge(cast<CallBase>(OldCS),
- cast<CallBase>(NewCS), NewCalleeNode);
- };
-
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF);
- if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
- {ReplaceCallSite}, TTI)) {
- LocalChange = true;
-
- // Update the call graph for the newly promoted function.
- CallGraphNode *NewNode = CG.getOrInsertFunction(NewF);
- NewNode->stealCalledFunctionsFrom(OldNode);
- if (OldNode->getNumReferences() == 0)
- delete CG.removeFunctionFromModule(OldNode);
- else
- OldF->setLinkage(Function::ExternalLinkage);
-
- // And updat ethe SCC we're iterating as well.
- SCC.ReplaceNode(OldNode, NewNode);
- }
- }
- // Remember that we changed something.
- Changed |= LocalChange;
- } while (LocalChange);
-
- return Changed;
-}
-
-bool ArgPromotion::doInitialization(CallGraph &CG) {
- return CallGraphSCCPass::doInitialization(CG);
-}
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index d66140a726f6..b05b7990e3f0 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,29 +15,25 @@
#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -50,6 +46,10 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Verifier.h"
+#endif
+
#include <cassert>
#include <string>
@@ -123,13 +123,13 @@ static cl::list<std::string>
SeedAllowList("attributor-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of attribute names that are "
"allowed to be seeded."),
- cl::ZeroOrMore, cl::CommaSeparated);
+ cl::CommaSeparated);
static cl::list<std::string> FunctionSeedAllowList(
"attributor-function-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of function names that are "
"allowed to be seeded."),
- cl::ZeroOrMore, cl::CommaSeparated);
+ cl::CommaSeparated);
#endif
static cl::opt<bool>
@@ -209,33 +209,25 @@ bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
}
bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
- const Value &V) {
- if (auto *C = dyn_cast<Constant>(&V))
- return !C->isThreadDependent();
- // TODO: Inspect and cache more complex instructions.
- if (auto *CB = dyn_cast<CallBase>(&V))
- return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() &&
- !CB->mayReadFromMemory();
- const Function *Scope = nullptr;
- if (auto *I = dyn_cast<Instruction>(&V))
- Scope = I->getFunction();
- if (auto *A = dyn_cast<Argument>(&V))
- Scope = A->getParent();
- if (!Scope)
+ const Value &V, bool ForAnalysisOnly) {
+ // TODO: See the AAInstanceInfo class comment.
+ if (!ForAnalysisOnly)
return false;
- auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- QueryingAA, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
- return NoRecurseAA.isAssumedNoRecurse();
+ auto &InstanceInfoAA = A.getAAFor<AAInstanceInfo>(
+ QueryingAA, IRPosition::value(V), DepClassTy::OPTIONAL);
+ return InstanceInfoAA.isAssumedUniqueForAnalysis();
}
Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty,
const TargetLibraryInfo *TLI) {
if (isa<AllocaInst>(Obj))
return UndefValue::get(&Ty);
- if (isAllocationFn(&Obj, TLI))
- return getInitialValueOfAllocation(&cast<CallBase>(Obj), TLI, &Ty);
+ if (Constant *Init = getInitialValueOfAllocation(&Obj, TLI, &Ty))
+ return Init;
auto *GV = dyn_cast<GlobalVariable>(&Obj);
- if (!GV || !GV->hasLocalLinkage())
+ if (!GV)
+ return nullptr;
+ if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer()))
return nullptr;
if (!GV->hasInitializer())
return UndefValue::get(&Ty);
@@ -252,19 +244,29 @@ bool AA::isValidInScope(const Value &V, const Function *Scope) {
return false;
}
-bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI,
+bool AA::isValidAtPosition(const AA::ValueAndContext &VAC,
InformationCache &InfoCache) {
- if (isa<Constant>(V))
+ if (isa<Constant>(VAC.getValue()) || VAC.getValue() == VAC.getCtxI())
return true;
- const Function *Scope = CtxI.getFunction();
- if (auto *A = dyn_cast<Argument>(&V))
+ const Function *Scope = nullptr;
+ const Instruction *CtxI = VAC.getCtxI();
+ if (CtxI)
+ Scope = CtxI->getFunction();
+ if (auto *A = dyn_cast<Argument>(VAC.getValue()))
return A->getParent() == Scope;
- if (auto *I = dyn_cast<Instruction>(&V))
+ if (auto *I = dyn_cast<Instruction>(VAC.getValue())) {
if (I->getFunction() == Scope) {
- const DominatorTree *DT =
- InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Scope);
- return DT && DT->dominates(I, &CtxI);
+ if (const DominatorTree *DT =
+ InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
+ *Scope))
+ return DT->dominates(I, CtxI);
+ // Local dominance check mostly for the old PM passes.
+ if (CtxI && I->getParent() == CtxI->getParent())
+ return llvm::any_of(
+ make_range(I->getIterator(), I->getParent()->end()),
+ [&](const Instruction &AfterI) { return &AfterI == CtxI; });
}
+ }
return false;
}
@@ -295,11 +297,11 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
const Optional<Value *> &B, Type *Ty) {
if (A == B)
return A;
- if (!B.hasValue())
+ if (!B)
return A;
if (*B == nullptr)
return nullptr;
- if (!A.hasValue())
+ if (!A)
return Ty ? getWithType(**B, *Ty) : nullptr;
if (*A == nullptr)
return nullptr;
@@ -314,21 +316,33 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
return nullptr;
}
-bool AA::getPotentialCopiesOfStoredValue(
- Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
- const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) {
+template <bool IsLoad, typename Ty>
+static bool getPotentialCopiesOfMemoryValue(
+ Attributor &A, Ty &I, SmallSetVector<Value *, 4> &PotentialCopies,
+ SmallSetVector<Instruction *, 4> &PotentialValueOrigins,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I
+ << " (only exact: " << OnlyExact << ")\n";);
- Value &Ptr = *SI.getPointerOperand();
+ Value &Ptr = *I.getPointerOperand();
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI)) {
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I,
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "Underlying objects stored into could not be determined\n";);
return false;
}
+ // Containers to remember the pointer infos and new copies while we are not
+ // sure that we can find all of them. If we abort we want to avoid spurious
+ // dependences and potential copies in the provided container.
SmallVector<const AAPointerInfo *> PIs;
SmallVector<Value *> NewCopies;
+ SmallVector<Instruction *> NewCopyOrigins;
+ const auto *TLI =
+ A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction());
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
@@ -336,7 +350,7 @@ bool AA::getPotentialCopiesOfStoredValue(
if (isa<ConstantPointerNull>(Obj)) {
// A null pointer access can be undefined but any offset from null may
// be OK. We do not try to optimize the latter.
- if (!NullPointerIsDefined(SI.getFunction(),
+ if (!NullPointerIsDefined(I.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) ==
Obj)
@@ -345,37 +359,74 @@ bool AA::getPotentialCopiesOfStoredValue(
dbgs() << "Underlying object is a valid nullptr, giving up.\n";);
return false;
}
+ // TODO: Use assumed noalias return.
if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj) &&
- !isNoAliasCall(Obj)) {
+ !(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) {
LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj
<< "\n";);
return false;
}
if (auto *GV = dyn_cast<GlobalVariable>(Obj))
- if (!GV->hasLocalLinkage()) {
+ if (!GV->hasLocalLinkage() &&
+ !(GV->isConstant() && GV->hasInitializer())) {
LLVM_DEBUG(dbgs() << "Underlying object is global with external "
"linkage, not supported yet: "
<< *Obj << "\n";);
return false;
}
+ if (IsLoad) {
+ Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI);
+ if (!InitialValue)
+ return false;
+ NewCopies.push_back(InitialValue);
+ NewCopyOrigins.push_back(nullptr);
+ }
+
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
- if (!Acc.isRead())
+ if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
+ return true;
+ if (IsLoad && Acc.isWrittenValueYetUndetermined())
return true;
- auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
- if (!LI) {
- LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
- "instruction not supported yet: "
- << *Acc.getRemoteInst() << "\n";);
+ if (OnlyExact && !IsExact &&
+ !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) {
+ LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
+ << ", abort!\n");
return false;
}
- NewCopies.push_back(LI);
+ if (IsLoad) {
+ assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
+ if (!Acc.isWrittenValueUnknown()) {
+ NewCopies.push_back(Acc.getWrittenValue());
+ NewCopyOrigins.push_back(Acc.getRemoteInst());
+ return true;
+ }
+ auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst());
+ if (!SI) {
+ LLVM_DEBUG(dbgs() << "Underlying object written through a non-store "
+ "instruction not supported yet: "
+ << *Acc.getRemoteInst() << "\n";);
+ return false;
+ }
+ NewCopies.push_back(SI->getValueOperand());
+ NewCopyOrigins.push_back(SI);
+ } else {
+ assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
+ auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
+ if (!LI && OnlyExact) {
+ LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
+ "instruction not supported yet: "
+ << *Acc.getRemoteInst() << "\n";);
+ return false;
+ }
+ NewCopies.push_back(Acc.getRemoteInst());
+ }
return true;
};
auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
DepClassTy::NONE);
- if (!PI.forallInterferingAccesses(SI, CheckAccess)) {
+ if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
@@ -385,16 +436,40 @@ bool AA::getPotentialCopiesOfStoredValue(
PIs.push_back(&PI);
}
+ // Only if we were successful collection all potential copies we record
+ // dependences (on non-fix AAPointerInfo AAs). We also only then modify the
+ // given PotentialCopies container.
for (auto *PI : PIs) {
if (!PI->getState().isAtFixpoint())
UsedAssumedInformation = true;
A.recordDependence(*PI, QueryingAA, DepClassTy::OPTIONAL);
}
PotentialCopies.insert(NewCopies.begin(), NewCopies.end());
+ PotentialValueOrigins.insert(NewCopyOrigins.begin(), NewCopyOrigins.end());
return true;
}
+bool AA::getPotentiallyLoadedValues(
+ Attributor &A, LoadInst &LI, SmallSetVector<Value *, 4> &PotentialValues,
+ SmallSetVector<Instruction *, 4> &PotentialValueOrigins,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ return getPotentialCopiesOfMemoryValue</* IsLoad */ true>(
+ A, LI, PotentialValues, PotentialValueOrigins, QueryingAA,
+ UsedAssumedInformation, OnlyExact);
+}
+
+bool AA::getPotentialCopiesOfStoredValue(
+ Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ return getPotentialCopiesOfMemoryValue</* IsLoad */ false>(
+ A, SI, PotentialCopies, PotentialValueOrigins, QueryingAA,
+ UsedAssumedInformation, OnlyExact);
+}
+
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA,
bool RequireReadNone, bool &IsKnown) {
@@ -449,6 +524,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
SmallVector<const Instruction *> Worklist;
Worklist.push_back(&FromI);
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
while (!Worklist.empty()) {
const Instruction *CurFromI = Worklist.pop_back_val();
if (!Visited.insert(CurFromI).second)
@@ -468,7 +545,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
<< *ToI << " [Intra]\n");
if (Result)
return true;
- continue;
+ if (NoRecurseAA.isAssumedNoRecurse())
+ continue;
}
// TODO: If we can go arbitrarily backwards we will eventually reach an
@@ -514,10 +592,10 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
return true;
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
Result = !A.checkForAllCallSites(CheckCallSite, *FromFn,
/* RequireAllCallSites */ true,
- &QueryingAA, AllCallSitesKnown);
+ &QueryingAA, UsedAssumedInformation);
if (Result) {
LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI
<< " in @" << FromFn->getName()
@@ -631,7 +709,7 @@ Argument *IRPosition::getAssociatedArgument() const {
assert(ACS.getCalledFunction()->arg_size() > u &&
"ACS mapped into var-args arguments!");
- if (CBCandidateArg.hasValue()) {
+ if (CBCandidateArg) {
CBCandidateArg = nullptr;
break;
}
@@ -640,7 +718,7 @@ Argument *IRPosition::getAssociatedArgument() const {
}
// If we found a unique callback candidate argument, return it.
- if (CBCandidateArg.hasValue() && CBCandidateArg.getValue())
+ if (CBCandidateArg && CBCandidateArg.getValue())
return CBCandidateArg.getValue();
// If no callbacks were found, or none used the underlying call site operand
@@ -949,22 +1027,24 @@ Attributor::getAssumedConstant(const IRPosition &IRP,
bool &UsedAssumedInformation) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
- // assume it's simpliied.
+ // assume it's simplified.
for (auto &CB : SimplificationCallbacks.lookup(IRP)) {
Optional<Value *> SimplifiedV = CB(IRP, &AA, UsedAssumedInformation);
- if (!SimplifiedV.hasValue())
+ if (!SimplifiedV)
return llvm::None;
if (isa_and_nonnull<Constant>(*SimplifiedV))
return cast<Constant>(*SimplifiedV);
return nullptr;
}
+ if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue()))
+ return C;
const auto &ValueSimplifyAA =
getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE);
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(*this);
bool IsKnown = ValueSimplifyAA.isAtFixpoint();
UsedAssumedInformation |= !IsKnown;
- if (!SimplifiedV.hasValue()) {
+ if (!SimplifiedV) {
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return llvm::None;
}
@@ -987,18 +1067,18 @@ Attributor::getAssumedSimplified(const IRPosition &IRP,
bool &UsedAssumedInformation) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
- // assume it's simpliied.
+ // assume it's simplified.
for (auto &CB : SimplificationCallbacks.lookup(IRP))
return CB(IRP, AA, UsedAssumedInformation);
- // If no high-level/outside simplification occured, use AAValueSimplify.
+ // If no high-level/outside simplification occurred, use AAValueSimplify.
const auto &ValueSimplifyAA =
getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE);
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(*this);
bool IsKnown = ValueSimplifyAA.isAtFixpoint();
UsedAssumedInformation |= !IsKnown;
- if (!SimplifiedV.hasValue()) {
+ if (!SimplifiedV) {
if (AA)
recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL);
return llvm::None;
@@ -1017,7 +1097,7 @@ Attributor::getAssumedSimplified(const IRPosition &IRP,
Optional<Value *> Attributor::translateArgumentToCallSiteContent(
Optional<Value *> V, CallBase &CB, const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
- if (!V.hasValue())
+ if (!V)
return V;
if (*V == nullptr || isa<Constant>(*V))
return V;
@@ -1078,6 +1158,19 @@ bool Attributor::isAssumedDead(const Use &U,
BasicBlock *IncomingBB = PHI->getIncomingBlock(U);
return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
+ if (!CheckBBLivenessOnly && SI->getPointerOperand() != U.get()) {
+ const IRPosition IRP = IRPosition::inst(*SI);
+ const AAIsDead &IsDeadAA =
+ getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
+ if (IsDeadAA.isRemovableStore()) {
+ if (QueryingAA)
+ recordDependence(IsDeadAA, *QueryingAA, DepClass);
+ if (!IsDeadAA.isKnown(AAIsDead::IS_REMOVABLE))
+ UsedAssumedInformation = true;
+ return true;
+ }
+ }
}
return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA,
@@ -1191,6 +1284,7 @@ bool Attributor::checkForAllUses(
function_ref<bool(const Use &, bool &)> Pred,
const AbstractAttribute &QueryingAA, const Value &V,
bool CheckBBLivenessOnly, DepClassTy LivenessDepClass,
+ bool IgnoreDroppableUses,
function_ref<bool(const Use &OldU, const Use &NewU)> EquivalentUseCB) {
// Check the trivial case first as it catches void values.
@@ -1231,7 +1325,7 @@ bool Attributor::checkForAllUses(
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
- if (U->getUser()->isDroppable()) {
+ if (IgnoreDroppableUses && U->getUser()->isDroppable()) {
LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n");
continue;
}
@@ -1241,9 +1335,9 @@ bool Attributor::checkForAllUses(
if (!Visited.insert(U).second)
continue;
SmallSetVector<Value *, 4> PotentialCopies;
- if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies,
- QueryingAA,
- UsedAssumedInformation)) {
+ if (AA::getPotentialCopiesOfStoredValue(
+ *this, *SI, PotentialCopies, QueryingAA, UsedAssumedInformation,
+ /* OnlyExact */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Value is stored, continue with "
<< PotentialCopies.size()
<< " potential copies instead!\n");
@@ -1277,7 +1371,7 @@ bool Attributor::checkForAllUses(
bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const AbstractAttribute &QueryingAA,
bool RequireAllCallSites,
- bool &AllCallSitesKnown) {
+ bool &UsedAssumedInformation) {
// We can try to determine information from
// the call sites. However, this is only possible all call sites are known,
// hence the function has internal linkage.
@@ -1286,31 +1380,26 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
if (!AssociatedFunction) {
LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP
<< "\n");
- AllCallSitesKnown = false;
return false;
}
return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites,
- &QueryingAA, AllCallSitesKnown);
+ &QueryingAA, UsedAssumedInformation);
}
bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Function &Fn,
bool RequireAllCallSites,
const AbstractAttribute *QueryingAA,
- bool &AllCallSitesKnown) {
+ bool &UsedAssumedInformation) {
if (RequireAllCallSites && !Fn.hasLocalLinkage()) {
LLVM_DEBUG(
dbgs()
<< "[Attributor] Function " << Fn.getName()
<< " has no internal linkage, hence not all call sites are known\n");
- AllCallSitesKnown = false;
return false;
}
- // If we do not require all call sites we might not see all.
- AllCallSitesKnown = RequireAllCallSites;
-
SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
for (unsigned u = 0; u < Uses.size(); ++u) {
const Use &U = *Uses[u];
@@ -1322,15 +1411,13 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser()
<< "\n";
});
- bool UsedAssumedInformation = false;
if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
- if (CE->isCast() && CE->getType()->isPointerTy() &&
- CE->getType()->getPointerElementType()->isFunctionTy()) {
+ if (CE->isCast() && CE->getType()->isPointerTy()) {
LLVM_DEBUG(
dbgs() << "[Attributor] Use, is constant cast expression, add "
<< CE->getNumUses()
@@ -1477,30 +1564,24 @@ static bool checkForAllInstructionsImpl(
}
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
+ const Function *Fn,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly,
bool CheckPotentiallyDead) {
-
- const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide instructions we have to have an exact definition.
- const Function *AssociatedFunction = IRP.getAssociatedFunction();
- if (!AssociatedFunction)
- return false;
-
- if (AssociatedFunction->isDeclaration())
+ if (!Fn || Fn->isDeclaration())
return false;
// TODO: use the function scope once we have call site AAReturnedValues.
- const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const IRPosition &QueryIRP = IRPosition::function(*Fn);
const auto *LivenessAA =
(CheckBBLivenessOnly || CheckPotentiallyDead)
? nullptr
: &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
- auto &OpcodeInstMap =
- InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
LivenessAA, Opcodes, UsedAssumedInformation,
CheckBBLivenessOnly, CheckPotentiallyDead))
@@ -1509,6 +1590,19 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
return true;
}
+bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
+ const AbstractAttribute &QueryingAA,
+ const ArrayRef<unsigned> &Opcodes,
+ bool &UsedAssumedInformation,
+ bool CheckBBLivenessOnly,
+ bool CheckPotentiallyDead) {
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ return checkForAllInstructions(Pred, AssociatedFunction, QueryingAA, Opcodes,
+ UsedAssumedInformation, CheckBBLivenessOnly,
+ CheckPotentiallyDead);
+}
+
bool Attributor::checkForAllReadWriteInstructions(
function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA,
bool &UsedAssumedInformation) {
@@ -1547,11 +1641,8 @@ void Attributor::runTillFixpoint() {
// the abstract analysis.
unsigned IterationCounter = 1;
- unsigned MaxFixedPointIterations;
- if (MaxFixpointIterations)
- MaxFixedPointIterations = MaxFixpointIterations.getValue();
- else
- MaxFixedPointIterations = SetFixpointIterations;
+ unsigned MaxIterations =
+ Configuration.MaxFixpointIterations.value_or(SetFixpointIterations);
SmallVector<AbstractAttribute *, 32> ChangedAAs;
SetVector<AbstractAttribute *> Worklist, InvalidAAs;
@@ -1636,21 +1727,20 @@ void Attributor::runTillFixpoint() {
QueryAAsAwaitingUpdate.end());
QueryAAsAwaitingUpdate.clear();
- } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
- VerifyMaxFixpointIterations));
+ } while (!Worklist.empty() &&
+ (IterationCounter++ < MaxIterations || VerifyMaxFixpointIterations));
- if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
+ if (IterationCounter > MaxIterations && !Functions.empty()) {
auto Remark = [&](OptimizationRemarkMissed ORM) {
return ORM << "Attributor did not reach a fixpoint after "
- << ore::NV("Iterations", MaxFixedPointIterations)
- << " iterations.";
+ << ore::NV("Iterations", MaxIterations) << " iterations.";
};
- Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
+ Function *F = Functions.front();
emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
}
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
- << IterationCounter << "/" << MaxFixpointIterations
+ << IterationCounter << "/" << MaxIterations
<< " iterations\n");
// Reset abstract arguments not settled in a sound fixpoint by now. This
@@ -1684,11 +1774,9 @@ void Attributor::runTillFixpoint() {
<< " abstract attributes.\n";
});
- if (VerifyMaxFixpointIterations &&
- IterationCounter != MaxFixedPointIterations) {
+ if (VerifyMaxFixpointIterations && IterationCounter != MaxIterations) {
errs() << "\n[Attributor] Fixpoint iteration done after: "
- << IterationCounter << "/" << MaxFixedPointIterations
- << " iterations\n";
+ << IterationCounter << "/" << MaxIterations << " iterations\n";
llvm_unreachable("The fixpoint was not reached with exactly the number of "
"specified iterations!");
}
@@ -1725,6 +1813,9 @@ ChangeStatus Attributor::manifestAttributes() {
if (!State.isValidState())
continue;
+ if (AA->getCtxI() && !isRunOn(*AA->getAnchorScope()))
+ continue;
+
// Skip dead code.
bool UsedAssumedInformation = false;
if (isAssumedDead(*AA, nullptr, UsedAssumedInformation,
@@ -1774,7 +1865,7 @@ ChangeStatus Attributor::manifestAttributes() {
void Attributor::identifyDeadInternalFunctions() {
// Early exit if we don't intend to delete functions.
- if (!DeleteFns)
+ if (!Configuration.DeleteFns)
return;
// Identify dead internal functions and delete them. This happens outside
@@ -1795,7 +1886,7 @@ void Attributor::identifyDeadInternalFunctions() {
if (!F)
continue;
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (checkForAllCallSites(
[&](AbstractCallSite ACS) {
Function *Callee = ACS.getInstruction()->getFunction();
@@ -1803,7 +1894,7 @@ void Attributor::identifyDeadInternalFunctions() {
(Functions.count(Callee) && Callee->hasLocalLinkage() &&
!LiveInternalFns.count(Callee));
},
- *F, true, nullptr, AllCallSitesKnown)) {
+ *F, true, nullptr, UsedAssumedInformation)) {
continue;
}
@@ -1826,7 +1917,8 @@ ChangeStatus Attributor::cleanupIR() {
<< ToBeDeletedBlocks.size() << " blocks and "
<< ToBeDeletedInsts.size() << " instructions and "
<< ToBeChangedValues.size() << " values and "
- << ToBeChangedUses.size() << " uses. "
+ << ToBeChangedUses.size() << " uses. To insert "
+ << ToBeChangedToUnreachableInsts.size() << " unreachables."
<< "Preserve manifest added " << ManifestAddedBlocks.size()
<< " blocks\n");
@@ -1844,12 +1936,15 @@ ChangeStatus Attributor::cleanupIR() {
NewV = Entry.first;
} while (true);
+ Instruction *I = dyn_cast<Instruction>(U->getUser());
+ assert((!I || isRunOn(*I->getFunction())) &&
+ "Cannot replace an instruction outside the current SCC!");
+
// Do not replace uses in returns if the value is a must-tail call we will
// not delete.
- if (auto *RI = dyn_cast<ReturnInst>(U->getUser())) {
+ if (auto *RI = dyn_cast_or_null<ReturnInst>(I)) {
if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts()))
- if (CI->isMustTailCall() &&
- (!ToBeDeletedInsts.count(CI) || !isRunOn(*CI->getCaller())))
+ if (CI->isMustTailCall() && !ToBeDeletedInsts.count(CI))
return;
// If we rewrite a return and the new value is not an argument, strip the
// `returned` attribute as it is wrong now.
@@ -1859,8 +1954,8 @@ ChangeStatus Attributor::cleanupIR() {
}
// Do not perform call graph altering changes outside the SCC.
- if (auto *CB = dyn_cast<CallBase>(U->getUser()))
- if (CB->isCallee(U) && !isRunOn(*CB->getCaller()))
+ if (auto *CB = dyn_cast_or_null<CallBase>(I))
+ if (CB->isCallee(U))
return;
LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser()
@@ -1908,8 +2003,12 @@ ChangeStatus Attributor::cleanupIR() {
for (auto &U : OldV->uses())
if (Entry.second || !U.getUser()->isDroppable())
Uses.push_back(&U);
- for (Use *U : Uses)
+ for (Use *U : Uses) {
+ if (auto *I = dyn_cast<Instruction>(U->getUser()))
+ if (!isRunOn(*I->getFunction()))
+ continue;
ReplaceUse(U, NewV);
+ }
}
for (auto &V : InvokeWithDeadSuccessor)
@@ -1940,15 +2039,15 @@ ChangeStatus Attributor::cleanupIR() {
}
}
for (Instruction *I : TerminatorsToFold) {
- if (!isRunOn(*I->getFunction()))
- continue;
+ assert(isRunOn(*I->getFunction()) &&
+ "Cannot replace a terminator outside the current SCC!");
CGModifiedFunctions.insert(I->getFunction());
ConstantFoldTerminator(I->getParent());
}
for (auto &V : ToBeChangedToUnreachableInsts)
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
- if (!isRunOn(*I->getFunction()))
- continue;
+ assert(isRunOn(*I->getFunction()) &&
+ "Cannot replace an instruction outside the current SCC!");
CGModifiedFunctions.insert(I->getFunction());
changeToUnreachable(I);
}
@@ -1956,10 +2055,10 @@ ChangeStatus Attributor::cleanupIR() {
for (auto &V : ToBeDeletedInsts) {
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
if (auto *CB = dyn_cast<CallBase>(I)) {
- if (!isRunOn(*I->getFunction()))
- continue;
+ assert(isRunOn(*I->getFunction()) &&
+ "Cannot delete an instruction outside the current SCC!");
if (!isa<IntrinsicInst>(CB))
- CGUpdater.removeCallSite(*CB);
+ Configuration.CGUpdater.removeCallSite(*CB);
}
I->dropDroppableUses();
CGModifiedFunctions.insert(I->getFunction());
@@ -1972,9 +2071,7 @@ ChangeStatus Attributor::cleanupIR() {
}
}
- llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) {
- return !I || !isRunOn(*cast<Instruction>(I)->getFunction());
- });
+ llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) { return !I; });
LLVM_DEBUG({
dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() << "\n";
@@ -2010,12 +2107,12 @@ ChangeStatus Attributor::cleanupIR() {
for (Function *Fn : CGModifiedFunctions)
if (!ToBeDeletedFunctions.count(Fn) && Functions.count(Fn))
- CGUpdater.reanalyzeFunction(*Fn);
+ Configuration.CGUpdater.reanalyzeFunction(*Fn);
for (Function *Fn : ToBeDeletedFunctions) {
if (!Functions.count(Fn))
continue;
- CGUpdater.removeFunction(*Fn);
+ Configuration.CGUpdater.removeFunction(*Fn);
}
if (!ToBeChangedUses.empty())
@@ -2254,7 +2351,7 @@ bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
bool Attributor::isValidFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
- if (!RewriteSignatures)
+ if (!Configuration.RewriteSignatures)
return false;
Function *Fn = Arg.getParent();
@@ -2290,9 +2387,9 @@ bool Attributor::isValidFunctionSignatureRewrite(
}
// Avoid callbacks for now.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr,
- AllCallSitesKnown)) {
+ UsedAssumedInformation)) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n");
return false;
}
@@ -2305,7 +2402,6 @@ bool Attributor::isValidFunctionSignatureRewrite(
// Forbid must-tail calls for now.
// TODO:
- bool UsedAssumedInformation = false;
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr,
nullptr, {Instruction::Call},
@@ -2370,7 +2466,7 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
}
ChangeStatus Attributor::rewriteFunctionSignatures(
- SmallPtrSetImpl<Function *> &ModifiedFns) {
+ SmallSetVector<Function *, 8> &ModifiedFns) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (auto &It : ArgumentReplacementMap) {
@@ -2403,6 +2499,12 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
}
}
+ uint64_t LargestVectorWidth = 0;
+ for (auto *I : NewArgumentTypes)
+ if (auto *VT = dyn_cast<llvm::VectorType>(I))
+ LargestVectorWidth = std::max(
+ LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
+
FunctionType *OldFnTy = OldFn->getFunctionType();
Type *RetTy = OldFnTy->getReturnType();
@@ -2432,6 +2534,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
NewFn->setAttributes(AttributeList::get(
Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
NewArgumentAttributes));
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*NewFn, LargestVectorWidth);
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
@@ -2509,14 +2612,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
Ctx, OldCallAttributeList.getFnAttrs(),
OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*NewCB->getCaller(),
+ LargestVectorWidth);
+
CallSitePairs.push_back({OldCB, NewCB});
return true;
};
// Use the CallSiteReplacementCreator to create replacement call sites.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn,
- true, nullptr, AllCallSitesKnown);
+ true, nullptr, UsedAssumedInformation);
(void)Success;
assert(Success && "Assumed call site replacement to succeed!");
@@ -2529,6 +2635,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
ARIs[OldArgNum]) {
if (ARI->CalleeRepairCB)
ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt);
+ if (ARI->ReplacementTypes.empty())
+ OldFnArgIt->replaceAllUsesWith(
+ PoisonValue::get(OldFnArgIt->getType()));
NewFnArgIt += ARI->ReplacementTypes.size();
} else {
NewFnArgIt->takeName(&*OldFnArgIt);
@@ -2544,17 +2653,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
assert(OldCB.getType() == NewCB.getType() &&
"Cannot handle call sites with different types!");
ModifiedFns.insert(OldCB.getFunction());
- CGUpdater.replaceCallSite(OldCB, NewCB);
+ Configuration.CGUpdater.replaceCallSite(OldCB, NewCB);
OldCB.replaceAllUsesWith(&NewCB);
OldCB.eraseFromParent();
}
// Replace the function in the call graph (if any).
- CGUpdater.replaceFunctionWith(*OldFn, *NewFn);
+ Configuration.CGUpdater.replaceFunctionWith(*OldFn, *NewFn);
// If the old function was modified and needed to be reanalyzed, the new one
// does now.
- if (ModifiedFns.erase(OldFn))
+ if (ModifiedFns.remove(OldFn))
ModifiedFns.insert(NewFn);
Changed = ChangeStatus::CHANGED;
@@ -2574,6 +2683,30 @@ void InformationCache::initializeInformationCache(const Function &CF,
// queried by abstract attributes during their initialization or update.
// This has to happen before we create attributes.
+ DenseMap<const Value *, Optional<short>> AssumeUsesMap;
+
+ // Add \p V to the assume uses map which track the number of uses outside of
+ // "visited" assumes. If no outside uses are left the value is added to the
+ // assume only use vector.
+ auto AddToAssumeUsesMap = [&](const Value &V) -> void {
+ SmallVector<const Instruction *> Worklist;
+ if (auto *I = dyn_cast<Instruction>(&V))
+ Worklist.push_back(I);
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+ Optional<short> &NumUses = AssumeUsesMap[I];
+ if (!NumUses)
+ NumUses = I->getNumUses();
+ NumUses = NumUses.getValue() - /* this assume */ 1;
+ if (NumUses.getValue() != 0)
+ continue;
+ AssumeOnlyValues.insert(I);
+ for (const Value *Op : I->operands())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Worklist.push_back(OpI);
+ }
+ };
+
for (Instruction &I : instructions(&F)) {
bool IsInterestingOpcode = false;
@@ -2594,6 +2727,7 @@ void InformationCache::initializeInformationCache(const Function &CF,
// For `must-tail` calls we remember the caller and callee.
if (auto *Assume = dyn_cast<AssumeInst>(&I)) {
fillMapFromAssume(*Assume, KnowledgeMap);
+ AddToAssumeUsesMap(*Assume->getArgOperand(0));
} else if (cast<CallInst>(I).isMustTailCall()) {
FI.ContainsMustTailCall = true;
if (const Function *Callee = cast<CallInst>(I).getCalledFunction())
@@ -2742,7 +2876,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAIsDead>(RetPos);
// Every function might be simplified.
- getOrCreateAAFor<AAValueSimplify>(RetPos);
+ bool UsedAssumedInformation = false;
+ getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation);
// Every returned value might be marked noundef.
getOrCreateAAFor<AANoUndef>(RetPos);
@@ -2834,7 +2969,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) {
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
- getOrCreateAAFor<AAValueSimplify>(CBRetPos);
+ bool UsedAssumedInformation = false;
+ getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation);
}
for (int I = 0, E = CB.arg_size(); I < E; ++I) {
@@ -2897,10 +3033,15 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAAlign>(
IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
if (SimplifyAllLoads)
- getOrCreateAAFor<AAValueSimplify>(IRPosition::value(I));
- } else
- getOrCreateAAFor<AAAlign>(
- IRPosition::value(*cast<StoreInst>(I).getPointerOperand()));
+ getAssumedSimplified(IRPosition::value(I), nullptr,
+ UsedAssumedInformation);
+ } else {
+ auto &SI = cast<StoreInst>(I);
+ getOrCreateAAFor<AAIsDead>(IRPosition::inst(I));
+ getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr,
+ UsedAssumedInformation);
+ getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand()));
+ }
return true;
};
Success = checkForAllInstructionsImpl(
@@ -2975,8 +3116,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS,
if (!S.isValidState())
OS << "full-set";
else {
- for (auto &it : S.getAssumedSet())
- OS << it << ", ";
+ for (auto &It : S.getAssumedSet())
+ OS << It << ", ";
if (S.undefIsContained())
OS << "undef ";
}
@@ -3018,8 +3159,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS,
OS << " [" << Acc.getKind() << "] " << *Acc.getRemoteInst();
if (Acc.getLocalInst() != Acc.getRemoteInst())
OS << " via " << *Acc.getLocalInst();
- if (Acc.getContent().hasValue())
- OS << " [" << *Acc.getContent() << "]";
+ if (Acc.getContent()) {
+ if (*Acc.getContent())
+ OS << " [" << **Acc.getContent() << "]";
+ else
+ OS << " [ <unknown> ]";
+ }
return OS;
}
///}
@@ -3032,7 +3177,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
SetVector<Function *> &Functions,
AnalysisGetter &AG,
CallGraphUpdater &CGUpdater,
- bool DeleteFns) {
+ bool DeleteFns, bool IsModulePass) {
if (Functions.empty())
return false;
@@ -3045,8 +3190,10 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
// Create an Attributor and initially empty information cache that is filled
// while we identify default attribute opportunities.
- Attributor A(Functions, InfoCache, CGUpdater, /* Allowed */ nullptr,
- DeleteFns);
+ AttributorConfig AC(CGUpdater);
+ AC.IsModulePass = IsModulePass;
+ AC.DeleteFns = DeleteFns;
+ Attributor A(Functions, InfoCache, AC);
// Create shallow wrappers for all functions that are not IPO amendable
if (AllowShallowWrappers)
@@ -3151,7 +3298,7 @@ PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ true)) {
+ /* DeleteFns */ true, /* IsModulePass */ true)) {
// FIXME: Think about passes we will preserve and add them here.
return PreservedAnalyses::none();
}
@@ -3179,7 +3326,8 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ false)) {
+ /* DeleteFns */ false,
+ /* IsModulePass */ false)) {
// FIXME: Think about passes we will preserve and add them here.
PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
@@ -3255,7 +3403,8 @@ struct AttributorLegacyPass : public ModulePass {
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns*/ true);
+ /* DeleteFns*/ true,
+ /* IsModulePass */ true);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -3292,7 +3441,8 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ false);
+ /* DeleteFns */ false,
+ /* IsModulePass */ false);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 2d88e329e093..4d99ce7e3175 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -14,9 +14,11 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -30,21 +32,29 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
using namespace llvm;
@@ -69,11 +79,11 @@ static cl::opt<unsigned, true> MaxPotentialValues(
cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
cl::init(7));
-static cl::opt<unsigned>
- MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden,
- cl::desc("Maximum number of interfering writes to "
- "check before assuming all might interfere."),
- cl::init(6));
+static cl::opt<unsigned> MaxInterferingAccesses(
+ "attributor-max-interfering-accesses", cl::Hidden,
+ cl::desc("Maximum number of interfering accesses to "
+ "check before assuming all might interfere."),
+ cl::init(6));
STATISTIC(NumAAs, "Number of abstract attributes created");
@@ -140,6 +150,7 @@ PIPE_OPERATOR(AANonNull)
PIPE_OPERATOR(AANoAlias)
PIPE_OPERATOR(AADereferenceable)
PIPE_OPERATOR(AAAlign)
+PIPE_OPERATOR(AAInstanceInfo)
PIPE_OPERATOR(AANoCapture)
PIPE_OPERATOR(AAValueSimplify)
PIPE_OPERATOR(AANoFree)
@@ -150,7 +161,7 @@ PIPE_OPERATOR(AAMemoryLocation)
PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
PIPE_OPERATOR(AAUndefinedBehavior)
-PIPE_OPERATOR(AAPotentialValues)
+PIPE_OPERATOR(AAPotentialConstantValues)
PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
@@ -170,6 +181,45 @@ ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
} // namespace llvm
+/// Checks if a type could have padding bytes.
+static bool isDenselyPacked(Type *Ty, const DataLayout &DL) {
+ // There is no size information, so be conservative.
+ if (!Ty->isSized())
+ return false;
+
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty))
+ return false;
+
+ // FIXME: This isn't the right way to check for padding in vectors with
+ // non-byte-size elements.
+ if (VectorType *SeqTy = dyn_cast<VectorType>(Ty))
+ return isDenselyPacked(SeqTy->getElementType(), DL);
+
+ // For array types, check for padding within members.
+ if (ArrayType *SeqTy = dyn_cast<ArrayType>(Ty))
+ return isDenselyPacked(SeqTy->getElementType(), DL);
+
+ if (!isa<StructType>(Ty))
+ return true;
+
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(Ty);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned I = 0, E = StructTy->getNumElements(); I < E; ++I) {
+ Type *ElTy = StructTy->getElementType(I);
+ if (!isDenselyPacked(ElTy, DL))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(I))
+ return false;
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
+ }
+
+ return true;
+}
+
/// Get pointer operand of memory accessing instruction. If \p I is
/// not a memory accessing instruction, return nullptr. If \p AllowVolatile,
/// is set to false and the instruction is volatile, return nullptr.
@@ -236,7 +286,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
}
// Ensure the result has the requested type.
- Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
+ Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy,
+ Ptr->getName() + ".cast");
LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
return Ptr;
@@ -251,25 +302,32 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
/// once. Note that the value used for the callback may still be the value
/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
/// we will never visit more values than specified by \p MaxValues.
-/// If \p Intraprocedural is set to true only values valid in the scope of
-/// \p CtxI will be visited and simplification into other scopes is prevented.
+/// If \p VS does not contain the Interprocedural bit, only values valid in the
+/// scope of \p CtxI will be visited and simplification into other scopes is
+/// prevented.
template <typename StateTy>
static bool genericValueTraversal(
Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
StateTy &State,
function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
VisitValueCB,
- const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
+ const Instruction *CtxI, bool &UsedAssumedInformation,
+ bool UseValueSimplify = true, int MaxValues = 16,
function_ref<Value *(Value *)> StripCB = nullptr,
- bool Intraprocedural = false) {
+ AA::ValueScope VS = AA::Interprocedural) {
- const AAIsDead *LivenessAA = nullptr;
- if (IRP.getAnchorScope())
- LivenessAA = &A.getAAFor<AAIsDead>(
- QueryingAA,
- IRPosition::function(*IRP.getAnchorScope(), IRP.getCallBaseContext()),
- DepClassTy::NONE);
- bool AnyDead = false;
+ struct LivenessInfo {
+ const AAIsDead *LivenessAA = nullptr;
+ bool AnyDead = false;
+ };
+ SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs;
+ auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & {
+ LivenessInfo &LI = LivenessAAs[&F];
+ if (!LI.LivenessAA)
+ LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F),
+ DepClassTy::NONE);
+ return LI;
+ };
Value *InitialV = &IRP.getAssociatedValue();
using Item = std::pair<Value *, const Instruction *>;
@@ -319,10 +377,9 @@ static bool genericValueTraversal(
// Look through select instructions, visit assumed potential values.
if (auto *SI = dyn_cast<SelectInst>(V)) {
- bool UsedAssumedInformation = false;
Optional<Constant *> C = A.getAssumedConstant(
*SI->getCondition(), QueryingAA, UsedAssumedInformation);
- bool NoValueYet = !C.hasValue();
+ bool NoValueYet = !C;
if (NoValueYet || isa_and_nonnull<UndefValue>(*C))
continue;
if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) {
@@ -340,12 +397,12 @@ static bool genericValueTraversal(
// Look through phi nodes, visit all live operands.
if (auto *PHI = dyn_cast<PHINode>(V)) {
- assert(LivenessAA &&
- "Expected liveness in the presence of instructions!");
+ LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction());
for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
- AnyDead = true;
+ if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
+ LI.AnyDead = true;
+ UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint();
continue;
}
Worklist.push_back(
@@ -355,9 +412,9 @@ static bool genericValueTraversal(
}
if (auto *Arg = dyn_cast<Argument>(V)) {
- if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) {
+ if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) {
SmallVector<Item> CallSiteValues;
- bool AllCallSitesKnown = true;
+ bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(
[&](AbstractCallSite ACS) {
// Callbacks might not have a corresponding call site operand,
@@ -368,7 +425,7 @@ static bool genericValueTraversal(
CallSiteValues.push_back({CSOp, ACS.getInstruction()});
return true;
},
- *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) {
+ *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) {
Worklist.append(CallSiteValues);
continue;
}
@@ -376,14 +433,13 @@ static bool genericValueTraversal(
}
if (UseValueSimplify && !isa<Constant>(V)) {
- bool UsedAssumedInformation = false;
Optional<Value *> SimpleV =
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
- if (!SimpleV.hasValue())
+ if (!SimpleV)
continue;
Value *NewV = SimpleV.getValue();
if (NewV && NewV != V) {
- if (!Intraprocedural || !CtxI ||
+ if ((VS & AA::Interprocedural) || !CtxI ||
AA::isValidInScope(*NewV, CtxI->getFunction())) {
Worklist.push_back({NewV, CtxI});
continue;
@@ -391,6 +447,37 @@ static bool genericValueTraversal(
}
}
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ bool UsedAssumedInformation = false;
+ // If we ask for the potentially loaded values from the initial pointer we
+ // will simply end up here again. The load is as far as we can make it.
+ if (LI->getPointerOperand() != InitialV) {
+ SmallSetVector<Value *, 4> PotentialCopies;
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies,
+ PotentialValueOrigins, QueryingAA,
+ UsedAssumedInformation,
+ /* OnlyExact */ true)) {
+ // Values have to be dynamically unique or we loose the fact that a
+ // single llvm::Value might represent two runtime values (e.g., stack
+ // locations in different recursive calls).
+ bool DynamicallyUnique =
+ llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) {
+ return AA::isDynamicallyUnique(A, QueryingAA, *PC);
+ });
+ if (DynamicallyUnique &&
+ ((VS & AA::Interprocedural) || !CtxI ||
+ llvm::all_of(PotentialCopies, [CtxI](Value *PC) {
+ return AA::isValidInScope(*PC, CtxI->getFunction());
+ }))) {
+ for (auto *PotentialCopy : PotentialCopies)
+ Worklist.push_back({PotentialCopy, CtxI});
+ continue;
+ }
+ }
+ }
+ }
+
// Once a leaf is reached we inform the user through the callback.
if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
@@ -400,8 +487,10 @@ static bool genericValueTraversal(
} while (!Worklist.empty());
// If we actually used liveness information so we have to record a dependence.
- if (AnyDead)
- A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
+ for (auto &It : LivenessAAs)
+ if (It.second.AnyDead)
+ A.recordDependence(*It.second.LivenessAA, QueryingAA,
+ DepClassTy::OPTIONAL);
// All values have been visited.
return true;
@@ -411,7 +500,8 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
const Instruction *CtxI,
- bool Intraprocedural) {
+ bool &UsedAssumedInformation,
+ AA::ValueScope VS) {
auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
SmallPtrSet<Value *, 8> SeenObjects;
auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
@@ -423,15 +513,16 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
};
if (!genericValueTraversal<decltype(Objects)>(
A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
- true, 32, StripCB, Intraprocedural))
+ UsedAssumedInformation, true, 32, StripCB, VS))
return false;
return true;
}
-const Value *stripAndAccumulateMinimalOffsets(
- Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val,
- const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
- bool UseAssumed = false) {
+static const Value *
+stripAndAccumulateOffsets(Attributor &A, const AbstractAttribute &QueryingAA,
+ const Value *Val, const DataLayout &DL, APInt &Offset,
+ bool GetMinOffset, bool AllowNonInbounds,
+ bool UseAssumed = false) {
auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool {
const IRPosition &Pos = IRPosition::value(V);
@@ -442,14 +533,20 @@ const Value *stripAndAccumulateMinimalOffsets(
: DepClassTy::NONE);
ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed()
: ValueConstantRangeAA.getKnown();
+ if (Range.isFullSet())
+ return false;
+
// We can only use the lower part of the range because the upper part can
// be higher than what the value can really be.
- ROffset = Range.getSignedMin();
+ if (GetMinOffset)
+ ROffset = Range.getSignedMin();
+ else
+ ROffset = Range.getSignedMax();
return true;
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
- /* AllowInvariant */ false,
+ /* AllowInvariant */ true,
AttributorAnalysis);
}
@@ -458,8 +555,9 @@ getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA,
const Value *Ptr, int64_t &BytesOffset,
const DataLayout &DL, bool AllowNonInbounds = false) {
APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
- const Value *Base = stripAndAccumulateMinimalOffsets(
- A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds);
+ const Value *Base =
+ stripAndAccumulateOffsets(A, QueryingAA, Ptr, DL, OffsetAPInt,
+ /* GetMinOffset */ true, AllowNonInbounds);
BytesOffset = OffsetAPInt.getSExtValue();
return Base;
@@ -493,10 +591,9 @@ static void clampReturnedValueStates(
LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
<< " @ " << RVPos << "\n");
const StateType &AAS = AA.getState();
- if (T.hasValue())
- *T &= AAS;
- else
- T = AAS;
+ if (!T)
+ T = StateType::getBestState(AAS);
+ *T &= AAS;
LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
<< "\n");
return T->isValidState();
@@ -504,7 +601,7 @@ static void clampReturnedValueStates(
if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
S.indicatePessimisticFixpoint();
- else if (T.hasValue())
+ else if (T)
S ^= *T;
}
@@ -560,20 +657,19 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
<< " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
const StateType &AAS = AA.getState();
- if (T.hasValue())
- *T &= AAS;
- else
- T = AAS;
+ if (!T)
+ T = StateType::getBestState(AAS);
+ *T &= AAS;
LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
<< "\n");
return T->isValidState();
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
S.indicatePessimisticFixpoint();
- else if (T.hasValue())
+ else if (T)
S ^= *T;
}
@@ -667,7 +763,6 @@ struct AACallSiteReturnedFromReturned : public BaseType {
return clampStateAndIndicateChange(S, AA.getState());
}
};
-} // namespace
/// Helper function to accumulate uses.
template <class AAType, typename StateType = typename AAType::StateType>
@@ -779,6 +874,7 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
S += ParentState;
}
}
+} // namespace
/// ------------------------ PointerInfo ---------------------------------------
@@ -786,9 +882,6 @@ namespace llvm {
namespace AA {
namespace PointerInfo {
-/// An access kind description as used by AAPointerInfo.
-struct OffsetAndSize;
-
struct State;
} // namespace PointerInfo
@@ -806,7 +899,7 @@ struct DenseMapInfo<AAPointerInfo::Access> : DenseMapInfo<Instruction *> {
/// Helper that allows OffsetAndSize as a key in a DenseMap.
template <>
-struct DenseMapInfo<AA::PointerInfo ::OffsetAndSize>
+struct DenseMapInfo<AAPointerInfo ::OffsetAndSize>
: DenseMapInfo<std::pair<int64_t, int64_t>> {};
/// Helper for AA::PointerInfo::Acccess DenseMap/Set usage ignoring everythign
@@ -822,90 +915,15 @@ struct AccessAsInstructionInfo : DenseMapInfo<Instruction *> {
} // namespace llvm
-/// Helper to represent an access offset and size, with logic to deal with
-/// uncertainty and check for overlapping accesses.
-struct AA::PointerInfo::OffsetAndSize : public std::pair<int64_t, int64_t> {
- using BaseTy = std::pair<int64_t, int64_t>;
- OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
- OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
- int64_t getOffset() const { return first; }
- int64_t getSize() const { return second; }
- static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); }
-
- /// Return true if offset or size are unknown.
- bool offsetOrSizeAreUnknown() const {
- return getOffset() == OffsetAndSize::Unknown ||
- getSize() == OffsetAndSize::Unknown;
- }
-
- /// Return true if this offset and size pair might describe an address that
- /// overlaps with \p OAS.
- bool mayOverlap(const OffsetAndSize &OAS) const {
- // Any unknown value and we are giving up -> overlap.
- if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown())
- return true;
-
- // Check if one offset point is in the other interval [offset, offset+size].
- return OAS.getOffset() + OAS.getSize() > getOffset() &&
- OAS.getOffset() < getOffset() + getSize();
- }
-
- /// Constant used to represent unknown offset or sizes.
- static constexpr int64_t Unknown = 1 << 31;
-};
-
-/// Implementation of the DenseMapInfo.
-///
-///{
-inline llvm::AccessAsInstructionInfo::Access
-llvm::AccessAsInstructionInfo::getEmptyKey() {
- return Access(Base::getEmptyKey(), nullptr, AAPointerInfo::AK_READ, nullptr);
-}
-inline llvm::AccessAsInstructionInfo::Access
-llvm::AccessAsInstructionInfo::getTombstoneKey() {
- return Access(Base::getTombstoneKey(), nullptr, AAPointerInfo::AK_READ,
- nullptr);
-}
-unsigned llvm::AccessAsInstructionInfo::getHashValue(
- const llvm::AccessAsInstructionInfo::Access &A) {
- return Base::getHashValue(A.getRemoteInst());
-}
-bool llvm::AccessAsInstructionInfo::isEqual(
- const llvm::AccessAsInstructionInfo::Access &LHS,
- const llvm::AccessAsInstructionInfo::Access &RHS) {
- return LHS.getRemoteInst() == RHS.getRemoteInst();
-}
-inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
-llvm::DenseMapInfo<AAPointerInfo::Access>::getEmptyKey() {
- return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_READ,
- nullptr);
-}
-inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
-llvm::DenseMapInfo<AAPointerInfo::Access>::getTombstoneKey() {
- return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_WRITE,
- nullptr);
-}
-
-unsigned llvm::DenseMapInfo<AAPointerInfo::Access>::getHashValue(
- const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &A) {
- return detail::combineHashValue(
- DenseMapInfo<Instruction *>::getHashValue(A.getRemoteInst()),
- (A.isWrittenValueYetUndetermined()
- ? ~0
- : DenseMapInfo<Value *>::getHashValue(A.getWrittenValue()))) +
- A.getKind();
-}
-
-bool llvm::DenseMapInfo<AAPointerInfo::Access>::isEqual(
- const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &LHS,
- const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &RHS) {
- return LHS == RHS;
-}
-///}
-
/// A type to track pointer/struct usage and accesses for AAPointerInfo.
struct AA::PointerInfo::State : public AbstractState {
+ ~State() {
+ // We do not delete the Accesses objects but need to destroy them still.
+ for (auto &It : AccessBins)
+ It.second->~Accesses();
+ }
+
/// Return the best possible representable state.
static State getBestState(const State &SIS) { return State(); }
@@ -916,9 +934,10 @@ struct AA::PointerInfo::State : public AbstractState {
return R;
}
- State() {}
- State(const State &SIS) : AccessBins(SIS.AccessBins) {}
- State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {}
+ State() = default;
+ State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {
+ SIS.AccessBins.clear();
+ }
const State &getAssumed() const { return *this; }
@@ -967,15 +986,11 @@ struct AA::PointerInfo::State : public AbstractState {
return false;
auto &Accs = It->getSecond();
auto &RAccs = RIt->getSecond();
- if (Accs.size() != RAccs.size())
+ if (Accs->size() != RAccs->size())
return false;
- auto AccIt = Accs.begin(), RAccIt = RAccs.begin(), AccE = Accs.end();
- while (AccIt != AccE) {
- if (*AccIt != *RAccIt)
+ for (const auto &ZipIt : llvm::zip(*Accs, *RAccs))
+ if (std::get<0>(ZipIt) != std::get<1>(ZipIt))
return false;
- ++AccIt;
- ++RAccIt;
- }
++It;
++RIt;
}
@@ -984,42 +999,88 @@ struct AA::PointerInfo::State : public AbstractState {
bool operator!=(const State &R) const { return !(*this == R); }
/// We store accesses in a set with the instruction as key.
- using Accesses = DenseSet<AAPointerInfo::Access, AccessAsInstructionInfo>;
+ struct Accesses {
+ SmallVector<AAPointerInfo::Access, 4> Accesses;
+ DenseMap<const Instruction *, unsigned> Map;
+
+ unsigned size() const { return Accesses.size(); }
+
+ using vec_iterator = decltype(Accesses)::iterator;
+ vec_iterator begin() { return Accesses.begin(); }
+ vec_iterator end() { return Accesses.end(); }
+
+ using iterator = decltype(Map)::const_iterator;
+ iterator find(AAPointerInfo::Access &Acc) {
+ return Map.find(Acc.getRemoteInst());
+ }
+ iterator find_end() { return Map.end(); }
+
+ AAPointerInfo::Access &get(iterator &It) {
+ return Accesses[It->getSecond()];
+ }
+
+ void insert(AAPointerInfo::Access &Acc) {
+ Map[Acc.getRemoteInst()] = Accesses.size();
+ Accesses.push_back(Acc);
+ }
+ };
/// We store all accesses in bins denoted by their offset and size.
- using AccessBinsTy = DenseMap<OffsetAndSize, Accesses>;
+ using AccessBinsTy = DenseMap<AAPointerInfo::OffsetAndSize, Accesses *>;
AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); }
AccessBinsTy::const_iterator end() const { return AccessBins.end(); }
protected:
/// The bins with all the accesses for the associated pointer.
- DenseMap<OffsetAndSize, Accesses> AccessBins;
+ AccessBinsTy AccessBins;
/// Add a new access to the state at offset \p Offset and with size \p Size.
/// The access is associated with \p I, writes \p Content (if anything), and
/// is of kind \p Kind.
/// \Returns CHANGED, if the state changed, UNCHANGED otherwise.
- ChangeStatus addAccess(int64_t Offset, int64_t Size, Instruction &I,
- Optional<Value *> Content,
+ ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size,
+ Instruction &I, Optional<Value *> Content,
AAPointerInfo::AccessKind Kind, Type *Ty,
Instruction *RemoteI = nullptr,
Accesses *BinPtr = nullptr) {
- OffsetAndSize Key{Offset, Size};
- Accesses &Bin = BinPtr ? *BinPtr : AccessBins[Key];
+ AAPointerInfo::OffsetAndSize Key{Offset, Size};
+ Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key];
+ if (!Bin)
+ Bin = new (A.Allocator) Accesses;
AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty);
// Check if we have an access for this instruction in this bin, if not,
// simply add it.
- auto It = Bin.find(Acc);
- if (It == Bin.end()) {
- Bin.insert(Acc);
+ auto It = Bin->find(Acc);
+ if (It == Bin->find_end()) {
+ Bin->insert(Acc);
return ChangeStatus::CHANGED;
}
// If the existing access is the same as then new one, nothing changed.
- AAPointerInfo::Access Before = *It;
+ AAPointerInfo::Access &Current = Bin->get(It);
+ AAPointerInfo::Access Before = Current;
// The new one will be combined with the existing one.
- *It &= Acc;
- return *It == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+ Current &= Acc;
+ return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+ }
+
+ /// See AAPointerInfo::forallInterferingAccesses.
+ bool forallInterferingAccesses(
+ AAPointerInfo::OffsetAndSize OAS,
+ function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
+ if (!isValidState())
+ return false;
+
+ for (auto &It : AccessBins) {
+ AAPointerInfo::OffsetAndSize ItOAS = It.getFirst();
+ if (!OAS.mayOverlap(ItOAS))
+ continue;
+ bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown();
+ for (auto &Access : *It.getSecond())
+ if (!CB(Access, IsExact))
+ return false;
+ }
+ return true;
}
/// See AAPointerInfo::forallInterferingAccesses.
@@ -1028,10 +1089,11 @@ protected:
function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
if (!isValidState())
return false;
+
// First find the offset and size of I.
- OffsetAndSize OAS(-1, -1);
+ AAPointerInfo::OffsetAndSize OAS(-1, -1);
for (auto &It : AccessBins) {
- for (auto &Access : It.getSecond()) {
+ for (auto &Access : *It.getSecond()) {
if (Access.getRemoteInst() == &I) {
OAS = It.getFirst();
break;
@@ -1040,21 +1102,13 @@ protected:
if (OAS.getSize() != -1)
break;
}
+ // No access for I was found, we are done.
if (OAS.getSize() == -1)
return true;
// Now that we have an offset and size, find all overlapping ones and use
// the callback on the accesses.
- for (auto &It : AccessBins) {
- OffsetAndSize ItOAS = It.getFirst();
- if (!OAS.mayOverlap(ItOAS))
- continue;
- bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown();
- for (auto &Access : It.getSecond())
- if (!CB(Access, IsExact))
- return false;
- }
- return true;
+ return forallInterferingAccesses(OAS, CB);
}
private:
@@ -1062,6 +1116,7 @@ private:
BooleanState BS;
};
+namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1084,22 +1139,18 @@ struct AAPointerInfoImpl
}
bool forallInterferingAccesses(
- LoadInst &LI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
+ OffsetAndSize OAS,
+ function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
const override {
- return State::forallInterferingAccesses(LI, CB);
+ return State::forallInterferingAccesses(OAS, CB);
}
bool forallInterferingAccesses(
- StoreInst &SI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
- const override {
- return State::forallInterferingAccesses(SI, CB);
- }
- bool forallInterferingWrites(
- Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+ Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
function_ref<bool(const Access &, bool)> UserCB) const override {
SmallPtrSet<const Access *, 8> DominatingWrites;
- SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites;
+ SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses;
- Function &Scope = *LI.getFunction();
+ Function &Scope = *I.getFunction();
const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
@@ -1127,13 +1178,15 @@ struct AAPointerInfoImpl
// TODO: Use inter-procedural reachability and dominance.
const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- QueryingAA, IRPosition::function(*LI.getFunction()),
- DepClassTy::OPTIONAL);
+ QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
- const bool CanUseCFGResoning = CanIgnoreThreading(LI);
+ const bool FindInterferingWrites = I.mayReadFromMemory();
+ const bool FindInterferingReads = I.mayWriteToMemory();
+ const bool UseDominanceReasoning = FindInterferingWrites;
+ const bool CanUseCFGResoning = CanIgnoreThreading(I);
InformationCache &InfoCache = A.getInfoCache();
const DominatorTree *DT =
- NoRecurseAA.isKnownNoRecurse()
+ NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning
? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
Scope)
: nullptr;
@@ -1189,33 +1242,37 @@ struct AAPointerInfoImpl
}
auto AccessCB = [&](const Access &Acc, bool Exact) {
- if (!Acc.isWrite())
+ if ((!FindInterferingWrites || !Acc.isWrite()) &&
+ (!FindInterferingReads || !Acc.isRead()))
return true;
// For now we only filter accesses based on CFG reasoning which does not
// work yet if we have threading effects, or the access is complicated.
if (CanUseCFGResoning) {
- if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA,
- IsLiveInCalleeCB))
+ if ((!Acc.isWrite() ||
+ !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA,
+ IsLiveInCalleeCB)) &&
+ (!Acc.isRead() ||
+ !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA,
+ IsLiveInCalleeCB)))
return true;
- if (DT && Exact &&
- (Acc.getLocalInst()->getFunction() == LI.getFunction()) &&
+ if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) &&
IsSameThreadAsLoad(Acc)) {
- if (DT->dominates(Acc.getLocalInst(), &LI))
+ if (DT->dominates(Acc.getLocalInst(), &I))
DominatingWrites.insert(&Acc);
}
}
- InterferingWrites.push_back({&Acc, Exact});
+ InterferingAccesses.push_back({&Acc, Exact});
return true;
};
- if (!State::forallInterferingAccesses(LI, AccessCB))
+ if (!State::forallInterferingAccesses(I, AccessCB))
return false;
// If we cannot use CFG reasoning we only filter the non-write accesses
// and are done here.
if (!CanUseCFGResoning) {
- for (auto &It : InterferingWrites)
+ for (auto &It : InterferingAccesses)
if (!UserCB(*It.first, It.second))
return false;
return true;
@@ -1242,47 +1299,52 @@ struct AAPointerInfoImpl
return false;
};
- // Run the user callback on all writes we cannot skip and return if that
+ // Run the user callback on all accesses we cannot skip and return if that
// succeeded for all or not.
- unsigned NumInterferingWrites = InterferingWrites.size();
- for (auto &It : InterferingWrites)
- if (!DT || NumInterferingWrites > MaxInterferingWrites ||
- !CanSkipAccess(*It.first, It.second))
+ unsigned NumInterferingAccesses = InterferingAccesses.size();
+ for (auto &It : InterferingAccesses) {
+ if (!DT || NumInterferingAccesses > MaxInterferingAccesses ||
+ !CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
+ }
+ }
return true;
}
- ChangeStatus translateAndAddCalleeState(Attributor &A,
- const AAPointerInfo &CalleeAA,
- int64_t CallArgOffset, CallBase &CB) {
+ ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA,
+ int64_t Offset, CallBase &CB,
+ bool FromCallee = false) {
using namespace AA::PointerInfo;
- if (!CalleeAA.getState().isValidState() || !isValidState())
+ if (!OtherAA.getState().isValidState() || !isValidState())
return indicatePessimisticFixpoint();
- const auto &CalleeImplAA = static_cast<const AAPointerInfoImpl &>(CalleeAA);
- bool IsByval = CalleeImplAA.getAssociatedArgument()->hasByValAttr();
+ const auto &OtherAAImpl = static_cast<const AAPointerInfoImpl &>(OtherAA);
+ bool IsByval =
+ FromCallee && OtherAAImpl.getAssociatedArgument()->hasByValAttr();
// Combine the accesses bin by bin.
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- for (auto &It : CalleeImplAA.getState()) {
+ for (auto &It : OtherAAImpl.getState()) {
OffsetAndSize OAS = OffsetAndSize::getUnknown();
- if (CallArgOffset != OffsetAndSize::Unknown)
- OAS = OffsetAndSize(It.first.getOffset() + CallArgOffset,
- It.first.getSize());
- Accesses &Bin = AccessBins[OAS];
- for (const AAPointerInfo::Access &RAcc : It.second) {
+ if (Offset != OffsetAndSize::Unknown)
+ OAS = OffsetAndSize(It.first.getOffset() + Offset, It.first.getSize());
+ Accesses *Bin = AccessBins.lookup(OAS);
+ for (const AAPointerInfo::Access &RAcc : *It.second) {
if (IsByval && !RAcc.isRead())
continue;
bool UsedAssumedInformation = false;
- Optional<Value *> Content = A.translateArgumentToCallSiteContent(
- RAcc.getContent(), CB, *this, UsedAssumedInformation);
- AccessKind AK =
- AccessKind(RAcc.getKind() & (IsByval ? AccessKind::AK_READ
- : AccessKind::AK_READ_WRITE));
+ AccessKind AK = RAcc.getKind();
+ Optional<Value *> Content = RAcc.getContent();
+ if (FromCallee) {
+ Content = A.translateArgumentToCallSiteContent(
+ RAcc.getContent(), CB, *this, UsedAssumedInformation);
+ AK = AccessKind(
+ AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE));
+ }
Changed =
- Changed | addAccess(OAS.getOffset(), OAS.getSize(), CB, Content, AK,
- RAcc.getType(), RAcc.getRemoteInst(), &Bin);
+ Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content,
+ AK, RAcc.getType(), RAcc.getRemoteInst(), Bin);
}
}
return Changed;
@@ -1305,7 +1367,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
bool handleAccess(Attributor &A, Instruction &I, Value &Ptr,
Optional<Value *> Content, AccessKind Kind, int64_t Offset,
ChangeStatus &Changed, Type *Ty,
- int64_t Size = AA::PointerInfo::OffsetAndSize::Unknown) {
+ int64_t Size = OffsetAndSize::Unknown) {
using namespace AA::PointerInfo;
// No need to find a size if one is given or the offset is unknown.
if (Offset != OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown &&
@@ -1315,13 +1377,13 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
if (!AccessSize.isScalable())
Size = AccessSize.getFixedSize();
}
- Changed = Changed | addAccess(Offset, Size, I, Content, Kind, Ty);
+ Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, Ty);
return true;
};
/// Helper struct, will support ranges eventually.
struct OffsetInfo {
- int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
+ int64_t Offset = OffsetAndSize::Unknown;
bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
};
@@ -1329,7 +1391,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
- State S = getState();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &AssociatedValue = getAssociatedValue();
@@ -1337,7 +1398,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
- auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
+ auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI,
bool &Follow) {
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
UsrOI = PtrOI;
@@ -1475,8 +1536,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
const auto &CSArgPI = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::REQUIRED);
- Changed = translateAndAddCalleeState(
- A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) |
+ Changed = translateAndAddState(A, CSArgPI,
+ OffsetInfoMap[CurPtr].Offset, *CB) |
Changed;
return true;
}
@@ -1497,7 +1558,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
};
if (!A.checkForAllUses(UsePred, *this, AssociatedValue,
/* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL,
- EquivalentUseCB))
+ /* IgnoreDroppableUses */ true, EquivalentUseCB))
return indicatePessimisticFixpoint();
LLVM_DEBUG({
@@ -1505,15 +1566,19 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
for (auto &It : AccessBins) {
dbgs() << "[" << It.first.getOffset() << "-"
<< It.first.getOffset() + It.first.getSize()
- << "] : " << It.getSecond().size() << "\n";
- for (auto &Acc : It.getSecond()) {
+ << "] : " << It.getSecond()->size() << "\n";
+ for (auto &Acc : *It.getSecond()) {
dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst()
<< "\n";
if (Acc.getLocalInst() != Acc.getRemoteInst())
dbgs() << " --> "
<< *Acc.getRemoteInst() << "\n";
- if (!Acc.isWrittenValueYetUndetermined())
- dbgs() << " - " << Acc.getWrittenValue() << "\n";
+ if (!Acc.isWrittenValueYetUndetermined()) {
+ if (Acc.getWrittenValue())
+ dbgs() << " - c: " << *Acc.getWrittenValue() << "\n";
+ else
+ dbgs() << " - c: <unknown>\n";
+ }
}
}
});
@@ -1576,7 +1641,7 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
LengthVal = Length->getSExtValue();
Value &Ptr = getAssociatedValue();
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
- ChangeStatus Changed;
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (ArgNo == 0) {
handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed,
nullptr, LengthVal);
@@ -1601,7 +1666,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA =
A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED);
- return translateAndAddCalleeState(A, ArgAA, 0, *cast<CallBase>(getCtxI()));
+ return translateAndAddState(A, ArgAA, 0, *cast<CallBase>(getCtxI()),
+ /* FromCallee */ true);
}
/// See AbstractAttribute::trackStatistics()
@@ -1619,9 +1685,11 @@ struct AAPointerInfoCallSiteReturned final : AAPointerInfoFloating {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
+} // namespace
/// -----------------------NoUnwind Function Attribute--------------------------
+namespace {
struct AANoUnwindImpl : AANoUnwind {
AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {}
@@ -1693,9 +1761,11 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
};
+} // namespace
/// --------------------- Function Return Values -------------------------------
+namespace {
/// "Attribute" that collects all potential returned values and the return
/// instructions that they arise from.
///
@@ -1821,7 +1891,7 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
// Check if we have an assumed unique return value that we could manifest.
Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
- if (!UniqueRV.hasValue() || !UniqueRV.getValue())
+ if (!UniqueRV || !UniqueRV.getValue())
return Changed;
// Bookkeeping.
@@ -1893,17 +1963,18 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
return true;
};
+ bool UsedAssumedInformation = false;
auto ReturnInstCB = [&](Instruction &I) {
ReturnInst &Ret = cast<ReturnInst>(I);
return genericValueTraversal<ReturnInst>(
A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
- &I, /* UseValueSimplify */ true, /* MaxValues */ 16,
- /* StripCB */ nullptr, /* Intraprocedural */ true);
+ &I, UsedAssumedInformation, /* UseValueSimplify */ true,
+ /* MaxValues */ 16,
+ /* StripCB */ nullptr, AA::Intraprocedural);
};
// Discover returned values from all live returned instructions in the
// associated function.
- bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
UsedAssumedInformation))
return indicatePessimisticFixpoint();
@@ -1941,20 +2012,10 @@ struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
+} // namespace
/// ------------------------ NoSync Function Attribute -------------------------
-struct AANoSyncImpl : AANoSync {
- AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
-
- const std::string getAsStr() const override {
- return getAssumed() ? "nosync" : "may-sync";
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
-};
-
bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
if (!I->isAtomic())
return false;
@@ -1997,6 +2058,18 @@ bool AANoSync::isNoSyncIntrinsic(const Instruction *I) {
return false;
}
+namespace {
+struct AANoSyncImpl : AANoSync {
+ AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nosync" : "may-sync";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
auto CheckRWInstForNoSync = [&](Instruction &I) {
@@ -2059,9 +2132,11 @@ struct AANoSyncCallSite final : AANoSyncImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
};
+} // namespace
/// ------------------------ No-Free Attributes ----------------------------
+namespace {
struct AANoFreeImpl : public AANoFree {
AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {}
@@ -2243,8 +2318,10 @@ struct AANoFreeCallSiteReturned final : AANoFreeFloating {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) }
};
+} // namespace
/// ------------------------ NonNull Argument Attribute ------------------------
+namespace {
static int64_t getKnownNonNullAndDerefBytesForUse(
Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue,
const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
@@ -2332,7 +2409,7 @@ struct AANonNullImpl : AANonNull {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- Value &V = getAssociatedValue();
+ Value &V = *getAssociatedValue().stripPointerCasts();
if (!NullIsDefined &&
hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
/* IgnoreSubsumingPositions */ false, &A)) {
@@ -2356,7 +2433,7 @@ struct AANonNullImpl : AANonNull {
}
}
- if (isa<GlobalValue>(&getAssociatedValue())) {
+ if (isa<GlobalValue>(V)) {
indicatePessimisticFixpoint();
return;
}
@@ -2419,8 +2496,10 @@ struct AANonNullFloating : public AANonNullImpl {
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
@@ -2472,9 +2551,11 @@ struct AANonNullCallSiteReturned final
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
};
+} // namespace
/// ------------------------ No-Recurse Attributes ----------------------------
+namespace {
struct AANoRecurseImpl : public AANoRecurse {
AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {}
@@ -2498,14 +2579,15 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
DepClassTy::NONE);
return NoRecurseAA.isKnownNoRecurse();
};
- bool AllCallSitesKnown;
- if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) {
+ bool UsedAssumedInformation = false;
+ if (A.checkForAllCallSites(CallSitePred, *this, true,
+ UsedAssumedInformation)) {
// If we know all call sites and all are known no-recurse, we are done.
// If all known call sites, which might not be all that exist, are known
// to be no-recurse, we are not done but we can continue to assume
// no-recurse. If one of the call sites we have not visited will become
// live, another update is triggered.
- if (AllCallSitesKnown)
+ if (!UsedAssumedInformation)
indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -2549,9 +2631,11 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
};
+} // namespace
/// -------------------- Undefined-Behavior Attributes ------------------------
+namespace {
struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A)
: AAUndefinedBehavior(IRP, A) {}
@@ -2582,7 +2666,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I);
- if (!SimplifiedPtrOp.hasValue() || !SimplifiedPtrOp.getValue())
+ if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue())
return true;
const Value *PtrOpVal = SimplifiedPtrOp.getValue();
@@ -2627,7 +2711,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedCond =
stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
- if (!SimplifiedCond.hasValue() || !SimplifiedCond.getValue())
+ if (!SimplifiedCond || !*SimplifiedCond)
return true;
AssumedNoUBInsts.insert(&I);
return true;
@@ -2673,10 +2757,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
IRPosition::value(*ArgVal), *this, UsedAssumedInformation);
if (UsedAssumedInformation)
continue;
- if (SimplifiedVal.hasValue() && !SimplifiedVal.getValue())
+ if (SimplifiedVal && !SimplifiedVal.getValue())
return true;
- if (!SimplifiedVal.hasValue() ||
- isa<UndefValue>(*SimplifiedVal.getValue())) {
+ if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.getValue())) {
KnownUBInsts.insert(&I);
continue;
}
@@ -2691,40 +2774,38 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
return true;
};
- auto InspectReturnInstForUB =
- [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) {
- // Check if a return instruction always cause UB or not
- // Note: It is guaranteed that the returned position of the anchor
- // scope has noundef attribute when this is called.
- // We also ensure the return position is not "assumed dead"
- // because the returned value was then potentially simplified to
- // `undef` in AAReturnedValues without removing the `noundef`
- // attribute yet.
+ auto InspectReturnInstForUB = [&](Instruction &I) {
+ auto &RI = cast<ReturnInst>(I);
+ // Either we stopped and the appropriate action was taken,
+ // or we got back a simplified return value to continue.
+ Optional<Value *> SimplifiedRetValue =
+ stopOnUndefOrAssumed(A, RI.getReturnValue(), &I);
+ if (!SimplifiedRetValue || !*SimplifiedRetValue)
+ return true;
- // When the returned position has noundef attriubte, UB occur in the
- // following cases.
- // (1) Returned value is known to be undef.
- // (2) The value is known to be a null pointer and the returned
- // position has nonnull attribute (because the returned value is
- // poison).
- bool FoundUB = false;
- if (isa<UndefValue>(V)) {
- FoundUB = true;
- } else {
- if (isa<ConstantPointerNull>(V)) {
- auto &NonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::returned(*getAnchorScope()),
- DepClassTy::NONE);
- if (NonNullAA.isKnownNonNull())
- FoundUB = true;
- }
- }
+ // Check if a return instruction always cause UB or not
+ // Note: It is guaranteed that the returned position of the anchor
+ // scope has noundef attribute when this is called.
+ // We also ensure the return position is not "assumed dead"
+ // because the returned value was then potentially simplified to
+ // `undef` in AAReturnedValues without removing the `noundef`
+ // attribute yet.
- if (FoundUB)
- for (ReturnInst *RI : RetInsts)
- KnownUBInsts.insert(RI);
- return true;
- };
+ // When the returned position has noundef attriubte, UB occurs in the
+ // following cases.
+ // (1) Returned value is known to be undef.
+ // (2) The value is known to be a null pointer and the returned
+ // position has nonnull attribute (because the returned value is
+ // poison).
+ if (isa<ConstantPointerNull>(*SimplifiedRetValue)) {
+ auto &NonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE);
+ if (NonNullAA.isKnownNonNull())
+ KnownUBInsts.insert(&I);
+ }
+
+ return true;
+ };
bool UsedAssumedInformation = false;
A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
@@ -2747,8 +2828,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
auto &RetPosNoUndefAA =
A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE);
if (RetPosNoUndefAA.isKnownNoUndef())
- A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB,
- *this);
+ A.checkForAllInstructions(InspectReturnInstForUB, *this,
+ {Instruction::Ret}, UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true);
}
}
@@ -2776,7 +2858,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
case Instruction::AtomicRMW:
return !AssumedNoUBInsts.count(I);
case Instruction::Br: {
- auto BrInst = cast<BranchInst>(I);
+ auto *BrInst = cast<BranchInst>(I);
if (BrInst->isUnconditional())
return false;
return !AssumedNoUBInsts.count(I);
@@ -2847,13 +2929,13 @@ private:
IRPosition::value(*V), *this, UsedAssumedInformation);
if (!UsedAssumedInformation) {
// Don't depend on assumed values.
- if (!SimplifiedV.hasValue()) {
+ if (!SimplifiedV) {
// If it is known (which we tested above) but it doesn't have a value,
// then we can assume `undef` and hence the instruction is UB.
KnownUBInsts.insert(I);
return llvm::None;
}
- if (!SimplifiedV.getValue())
+ if (!*SimplifiedV)
return nullptr;
V = *SimplifiedV;
}
@@ -2877,9 +2959,11 @@ struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
KnownUBInsts.size();
}
};
+} // namespace
/// ------------------------ Will-Return Attributes ----------------------------
+namespace {
// Helper function that checks whether a function has any cycle which we don't
// know if it is bounded or not.
// Loops with maximum trip count are considered bounded, any other cycle not.
@@ -3018,9 +3102,11 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
};
+} // namespace
/// -------------------AAReachability Attribute--------------------------
+namespace {
struct AAReachabilityImpl : AAReachability {
AAReachabilityImpl(const IRPosition &IRP, Attributor &A)
: AAReachability(IRP, A) {}
@@ -3032,10 +3118,6 @@ struct AAReachabilityImpl : AAReachability {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
- if (!NoRecurseAA.isAssumedNoRecurse())
- return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
@@ -3047,9 +3129,11 @@ struct AAReachabilityFunction final : public AAReachabilityImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); }
};
+} // namespace
/// ------------------------ NoAlias Argument Attribute ------------------------
+namespace {
struct AANoAliasImpl : AANoAlias {
AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) {
assert(getAssociatedType()->isPointerTy() &&
@@ -3146,10 +3230,10 @@ struct AANoAliasArgument final
// If the argument is never passed through callbacks, no-alias cannot break
// synchronization.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(
[](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
- true, AllCallSitesKnown))
+ true, UsedAssumedInformation))
return Base::updateImpl(A);
// TODO: add no-alias but make sure it doesn't break synchronization by
@@ -3246,14 +3330,20 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return false;
}
+ auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) {
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ *this, IRPosition::value(*O), DepClassTy::OPTIONAL);
+ return DerefAA.getAssumedDereferenceableBytes();
+ };
+
A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
const Function *ScopeFn = VIRP.getAnchorScope();
auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, VIRP, DepClassTy::NONE);
// Check whether the value is captured in the scope using AANoCapture.
- // Look at CFG and check only uses possibly executed before this
- // callsite.
+ // Look at CFG and check only uses possibly executed before this
+ // callsite.
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
@@ -3265,12 +3355,6 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return true;
if (ScopeFn) {
- const auto &ReachabilityAA = A.getAAFor<AAReachability>(
- *this, IRPosition::function(*ScopeFn), DepClassTy::OPTIONAL);
-
- if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI()))
- return true;
-
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (CB->isArgOperand(&U)) {
@@ -3284,17 +3368,26 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return true;
}
}
+
+ if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this))
+ return true;
}
- // For cases which can potentially have more users
- if (isa<GetElementPtrInst>(U) || isa<BitCastInst>(U) || isa<PHINode>(U) ||
- isa<SelectInst>(U)) {
+ // TODO: We should track the capturing uses in AANoCapture but the problem
+ // is CGSCC runs. For those we would need to "allow" AANoCapture for
+ // a value in the module slice.
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::NO_CAPTURE:
+ return true;
+ case UseCaptureKind::MAY_CAPTURE:
+ LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *UserI
+ << "\n");
+ return false;
+ case UseCaptureKind::PASSTHROUGH:
Follow = true;
return true;
}
-
- LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n");
- return false;
+ llvm_unreachable("unknown UseCaptureKind");
};
if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
@@ -3423,12 +3516,21 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
};
+} // namespace
/// -------------------AAIsDead Function Attribute-----------------------
+namespace {
struct AAIsDeadValueImpl : public AAIsDead {
AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (auto *Scope = getAnchorScope())
+ if (!A.isRunOn(*Scope))
+ indicatePessimisticFixpoint();
+ }
+
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override { return isAssumed(IS_DEAD); }
@@ -3452,22 +3554,25 @@ struct AAIsDeadValueImpl : public AAIsDead {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ virtual const std::string getAsStr() const override {
return isAssumedDead() ? "assumed-dead" : "assumed-live";
}
/// Check if all uses are assumed dead.
bool areAllUsesAssumedDead(Attributor &A, Value &V) {
// Callers might not check the type, void has no uses.
- if (V.getType()->isVoidTy())
+ if (V.getType()->isVoidTy() || V.use_empty())
return true;
// If we replace a value with a constant there are no uses left afterwards.
if (!isa<Constant>(V)) {
+ if (auto *I = dyn_cast<Instruction>(&V))
+ if (!A.isRunOn(*I->getFunction()))
+ return false;
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(V, *this, UsedAssumedInformation);
- if (!C.hasValue() || *C)
+ if (!C || *C)
return true;
}
@@ -3477,7 +3582,8 @@ struct AAIsDeadValueImpl : public AAIsDead {
// without going through N update cycles. This is not required for
// correctness.
return A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ false,
- DepClassTy::REQUIRED);
+ DepClassTy::REQUIRED,
+ /* IgnoreDroppableUses */ false);
}
/// Determine if \p I is assumed to be side-effect free.
@@ -3508,6 +3614,8 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadValueImpl::initialize(A);
+
if (isa<UndefValue>(getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
@@ -3538,6 +3646,15 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
});
}
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
+ if (isa_and_nonnull<StoreInst>(I))
+ if (isValidState())
+ return "assumed-dead-store";
+ return AAIsDeadValueImpl::getAsStr();
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
@@ -3553,6 +3670,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
return ChangeStatus::UNCHANGED;
}
+ bool isRemovableStore() const override {
+ return isAssumed(IS_REMOVABLE) && isa<StoreInst>(&getAssociatedValue());
+ }
+
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
Value &V = getAssociatedValue();
@@ -3567,21 +3688,7 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
return ChangeStatus::CHANGED;
}
}
- if (V.use_empty())
- return ChangeStatus::UNCHANGED;
-
- bool UsedAssumedInformation = false;
- Optional<Constant *> C =
- A.getAssumedConstant(V, *this, UsedAssumedInformation);
- if (C.hasValue() && C.getValue())
- return ChangeStatus::UNCHANGED;
-
- // Replace the value with undef as it is dead but keep droppable uses around
- // as they provide information we don't want to give up on just yet.
- UndefValue &UV = *UndefValue::get(V.getType());
- bool AnyChange =
- A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false);
- return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -3596,23 +3703,22 @@ struct AAIsDeadArgument : public AAIsDeadFloating {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadFloating::initialize(A);
if (!A.isFunctionIPOAmendable(*getAnchorScope()))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = AAIsDeadFloating::manifest(A);
Argument &Arg = *getAssociatedArgument();
if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {}))
if (A.registerFunctionSignatureRewrite(
Arg, /* ReplacementTypes */ {},
Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) {
- Arg.dropDroppableUses();
return ChangeStatus::CHANGED;
}
- return Changed;
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -3625,6 +3731,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadValueImpl::initialize(A);
if (isa<UndefValue>(getAssociatedValue()))
indicatePessimisticFixpoint();
}
@@ -3661,7 +3768,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {}
+ : AAIsDeadFloating(IRP, A) {}
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override {
@@ -3670,6 +3777,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadFloating::initialize(A);
if (isa<UndefValue>(getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
@@ -3707,7 +3815,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
}
private:
- bool IsAssumedSideEffectFree;
+ bool IsAssumedSideEffectFree = true;
};
struct AAIsDeadReturned : public AAIsDeadValueImpl {
@@ -3727,9 +3835,8 @@ struct AAIsDeadReturned : public AAIsDeadValueImpl {
return areAllUsesAssumedDead(A, *ACS.getInstruction());
};
- bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredForCallSite, *this, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
@@ -3761,17 +3868,13 @@ struct AAIsDeadFunction : public AAIsDead {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- const Function *F = getAnchorScope();
- if (F && !F->isDeclaration()) {
- // We only want to compute liveness once. If the function is not part of
- // the SCC, skip it.
- if (A.isRunOn(*const_cast<Function *>(F))) {
- ToBeExploredFrom.insert(&F->getEntryBlock().front());
- assumeLive(A, F->getEntryBlock());
- } else {
- indicatePessimisticFixpoint();
- }
+ Function *F = getAnchorScope();
+ if (!F || F->isDeclaration() || !A.isRunOn(*F)) {
+ indicatePessimisticFixpoint();
+ return;
}
+ ToBeExploredFrom.insert(&F->getEntryBlock().front());
+ assumeLive(A, F->getEntryBlock());
}
/// See AbstractAttribute::getAsStr().
@@ -3834,6 +3937,9 @@ struct AAIsDeadFunction : public AAIsDead {
ChangeStatus updateImpl(Attributor &A) override;
bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
+ assert(From->getParent() == getAnchorScope() &&
+ To->getParent() == getAnchorScope() &&
+ "Used AAIsDead of the wrong function");
return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To));
}
@@ -3973,7 +4079,7 @@ identifyAliveSuccessors(Attributor &A, const BranchInst &BI,
} else {
Optional<Constant *> C =
A.getAssumedConstant(*BI.getCondition(), AA, UsedAssumedInformation);
- if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) {
+ if (!C || isa_and_nonnull<UndefValue>(*C)) {
// No value yet, assume both edges are dead.
} else if (isa_and_nonnull<ConstantInt>(*C)) {
const BasicBlock *SuccBB =
@@ -3995,7 +4101,7 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation);
- if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) {
+ if (!C || isa_and_nonnull<UndefValue>(C.getValue())) {
// No value yet, assume all edges are dead.
} else if (isa_and_nonnull<ConstantInt>(C.getValue())) {
for (auto &CaseIt : SI.cases()) {
@@ -4142,9 +4248,11 @@ struct AAIsDeadCallSite final : AAIsDeadFunction {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
+} // namespace
/// -------------------- Dereferenceable Argument Attribute --------------------
+namespace {
struct AADereferenceableImpl : AADereferenceable {
AADereferenceableImpl(const IRPosition &IRP, Attributor &A)
: AADereferenceable(IRP, A) {}
@@ -4152,6 +4260,7 @@ struct AADereferenceableImpl : AADereferenceable {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ Value &V = *getAssociatedValue().stripPointerCasts();
SmallVector<Attribute, 4> Attrs;
getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
Attrs, /* IgnoreSubsumingPositions */ false, &A);
@@ -4162,9 +4271,8 @@ struct AADereferenceableImpl : AADereferenceable {
NonNullAA = &A.getAAFor<AANonNull>(*this, IRP, DepClassTy::NONE);
bool CanBeNull, CanBeFreed;
- takeKnownDerefBytesMaximum(
- IRP.getAssociatedValue().getPointerDereferenceableBytes(
- A.getDataLayout(), CanBeNull, CanBeFreed));
+ takeKnownDerefBytesMaximum(V.getPointerDereferenceableBytes(
+ A.getDataLayout(), CanBeNull, CanBeFreed));
bool IsFnInterface = IRP.isFnInterfaceKind();
Function *FnScope = IRP.getAnchorScope();
@@ -4263,8 +4371,9 @@ struct AADereferenceableFloating : AADereferenceableImpl {
unsigned IdxWidth =
DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
APInt Offset(IdxWidth, 0);
- const Value *Base =
- stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false);
+ const Value *Base = stripAndAccumulateOffsets(
+ A, *this, &V, DL, Offset, /* GetMinOffset */ false,
+ /* AllowNonInbounds */ true);
const auto &AA = A.getAAFor<AADereferenceable>(
*this, IRPosition::value(*Base), DepClassTy::REQUIRED);
@@ -4312,8 +4421,10 @@ struct AADereferenceableFloating : AADereferenceableImpl {
};
DerefState T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
@@ -4377,9 +4488,11 @@ struct AADereferenceableCallSiteReturned final
STATS_DECLTRACK_CS_ATTR(dereferenceable);
}
};
+} // namespace
// ------------------------ Align Argument Attribute ------------------------
+namespace {
static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
Value &AssociatedValue, const Use *U,
const Instruction *I, bool &TrackUse) {
@@ -4450,14 +4563,8 @@ struct AAAlignImpl : AAAlign {
for (const Attribute &Attr : Attrs)
takeKnownMaximum(Attr.getValueAsInt());
- Value &V = getAssociatedValue();
- // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int
- // use of the function pointer. This was caused by D73131. We want to
- // avoid this for function pointers especially because we iterate
- // their uses and int2ptr is not handled. It is not a correctness
- // problem though!
- if (!V.getType()->getPointerElementType()->isFunctionTy())
- takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
+ Value &V = *getAssociatedValue().stripPointerCasts();
+ takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
if (getIRPosition().isFnInterfaceKind() &&
(!getAnchorScope() ||
@@ -4479,16 +4586,16 @@ struct AAAlignImpl : AAAlign {
for (const Use &U : AssociatedValue.uses()) {
if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
if (SI->getPointerOperand() == &AssociatedValue)
- if (SI->getAlignment() < getAssumedAlign()) {
+ if (SI->getAlign() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, Store,
"Number of times alignment added to a store");
- SI->setAlignment(Align(getAssumedAlign()));
+ SI->setAlignment(getAssumedAlign());
LoadStoreChanged = ChangeStatus::CHANGED;
}
} else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
if (LI->getPointerOperand() == &AssociatedValue)
- if (LI->getAlignment() < getAssumedAlign()) {
- LI->setAlignment(Align(getAssumedAlign()));
+ if (LI->getAlign() < getAssumedAlign()) {
+ LI->setAlignment(getAssumedAlign());
STATS_DECLTRACK(AAAlign, Load,
"Number of times alignment added to a load");
LoadStoreChanged = ChangeStatus::CHANGED;
@@ -4532,9 +4639,8 @@ struct AAAlignImpl : AAAlign {
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
- return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
- "-" + std::to_string(getAssumedAlign()) + ">")
- : "unknown-align";
+ return "align<" + std::to_string(getKnownAlign().value()) + "-" +
+ std::to_string(getAssumedAlign().value()) + ">";
}
};
@@ -4548,6 +4654,8 @@ struct AAAlignFloating : AAAlignImpl {
auto VisitValueCB = [&](Value &V, const Instruction *,
AAAlign::StateType &T, bool Stripped) -> bool {
+ if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
+ return true;
const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
@@ -4555,6 +4663,7 @@ struct AAAlignFloating : AAAlignImpl {
unsigned Alignment = 1;
if (const Value *Base =
GetPointerBaseWithConstantOffset(&V, Offset, DL)) {
+ // TODO: Use AAAlign for the base too.
Align PA = Base->getPointerAlignment(DL);
// BasePointerAddr + Offset = Alignment * Q for some integer Q.
// So we can say that the maximum power of two which is a divisor of
@@ -4578,8 +4687,10 @@ struct AAAlignFloating : AAAlignImpl {
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
// TODO: If we know we visited all incoming values, thus no are assumed
@@ -4657,7 +4768,7 @@ struct AAAlignCallSiteArgument final : AAAlignFloating {
// so we do not need to track a dependence.
const auto &ArgAlignAA = A.getAAFor<AAAlign>(
*this, IRPosition::argument(*Arg), DepClassTy::NONE);
- takeKnownMaximum(ArgAlignAA.getKnownAlign());
+ takeKnownMaximum(ArgAlignAA.getKnownAlign().value());
}
return Changed;
}
@@ -4684,8 +4795,10 @@ struct AAAlignCallSiteReturned final
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
+} // namespace
/// ------------------ Function No-Return Attribute ----------------------------
+namespace {
struct AANoReturnImpl : public AANoReturn {
AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {}
@@ -4753,9 +4866,179 @@ struct AANoReturnCallSite final : AANoReturnImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
};
+} // namespace
+
+/// ----------------------- Instance Info ---------------------------------
+
+namespace {
+/// A class to hold the state of for no-capture attributes.
+struct AAInstanceInfoImpl : public AAInstanceInfo {
+ AAInstanceInfoImpl(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfo(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ if (auto *C = dyn_cast<Constant>(&V)) {
+ if (C->isThreadDependent())
+ indicatePessimisticFixpoint();
+ else
+ indicateOptimisticFixpoint();
+ return;
+ }
+ if (auto *CB = dyn_cast<CallBase>(&V))
+ if (CB->arg_size() == 0 && !CB->mayHaveSideEffects() &&
+ !CB->mayReadFromMemory()) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ Value &V = getAssociatedValue();
+ const Function *Scope = nullptr;
+ if (auto *I = dyn_cast<Instruction>(&V))
+ Scope = I->getFunction();
+ if (auto *A = dyn_cast<Argument>(&V)) {
+ Scope = A->getParent();
+ if (!Scope->hasLocalLinkage())
+ return Changed;
+ }
+ if (!Scope)
+ return indicateOptimisticFixpoint();
+
+ auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
+ if (NoRecurseAA.isAssumedNoRecurse())
+ return Changed;
+
+ auto UsePred = [&](const Use &U, bool &Follow) {
+ const Instruction *UserI = dyn_cast<Instruction>(U.getUser());
+ if (!UserI || isa<GetElementPtrInst>(UserI) || isa<CastInst>(UserI) ||
+ isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+ Follow = true;
+ return true;
+ }
+ if (isa<LoadInst>(UserI) || isa<CmpInst>(UserI) ||
+ (isa<StoreInst>(UserI) &&
+ cast<StoreInst>(UserI)->getValueOperand() != U.get()))
+ return true;
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ // This check is not guaranteeing uniqueness but for now that we cannot
+ // end up with two versions of \p U thinking it was one.
+ if (!CB->getCalledFunction() ||
+ !CB->getCalledFunction()->hasLocalLinkage())
+ return true;
+ if (!CB->isArgOperand(&U))
+ return false;
+ const auto &ArgInstanceInfoAA = A.getAAFor<AAInstanceInfo>(
+ *this, IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)),
+ DepClassTy::OPTIONAL);
+ if (!ArgInstanceInfoAA.isAssumedUniqueForAnalysis())
+ return false;
+ // If this call base might reach the scope again we might forward the
+ // argument back here. This is very conservative.
+ if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr))
+ return false;
+ return true;
+ }
+ return false;
+ };
+
+ auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) {
+ if (auto *SI = dyn_cast<StoreInst>(OldU.getUser())) {
+ auto *Ptr = SI->getPointerOperand()->stripPointerCasts();
+ if (isa<AllocaInst>(Ptr) && AA::isDynamicallyUnique(A, *this, *Ptr))
+ return true;
+ auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(
+ *SI->getFunction());
+ if (isAllocationFn(Ptr, TLI) && AA::isDynamicallyUnique(A, *this, *Ptr))
+ return true;
+ }
+ return false;
+ };
+
+ if (!A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ true,
+ DepClassTy::OPTIONAL,
+ /* IgnoreDroppableUses */ true, EquivalentUseCB))
+ return indicatePessimisticFixpoint();
+
+ return Changed;
+ }
+
+ /// See AbstractState::getAsStr().
+ const std::string getAsStr() const override {
+ return isAssumedUniqueForAnalysis() ? "<unique [fAa]>" : "<unknown>";
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// InstanceInfo attribute for floating values.
+struct AAInstanceInfoFloating : AAInstanceInfoImpl {
+ AAInstanceInfoFloating(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoImpl(IRP, A) {}
+};
+
+/// NoCapture attribute for function arguments.
+struct AAInstanceInfoArgument final : AAInstanceInfoFloating {
+ AAInstanceInfoArgument(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoFloating(IRP, A) {}
+};
+
+/// InstanceInfo attribute for call site arguments.
+struct AAInstanceInfoCallSiteArgument final : AAInstanceInfoImpl {
+ AAInstanceInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA =
+ A.getAAFor<AAInstanceInfo>(*this, ArgPos, DepClassTy::REQUIRED);
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ }
+};
+
+/// InstanceInfo attribute for function return value.
+struct AAInstanceInfoReturned final : AAInstanceInfoImpl {
+ AAInstanceInfoReturned(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoImpl(IRP, A) {
+ llvm_unreachable("InstanceInfo is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("InstanceInfo is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("InstanceInfo is not applicable to function returns!");
+ }
+};
+
+/// InstanceInfo attribute deduction for a call site return value.
+struct AAInstanceInfoCallSiteReturned final : AAInstanceInfoFloating {
+ AAInstanceInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoFloating(IRP, A) {}
+};
+} // namespace
/// ----------------------- Variable Capturing ---------------------------------
+namespace {
/// A class to hold the state of for no-capture attributes.
struct AANoCaptureImpl : public AANoCapture {
AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {}
@@ -4863,143 +5146,69 @@ struct AANoCaptureImpl : public AANoCapture {
return "assumed not-captured-maybe-returned";
return "assumed-captured";
}
-};
-
-/// Attributor-aware capture tracker.
-struct AACaptureUseTracker final : public CaptureTracker {
-
- /// Create a capture tracker that can lookup in-flight abstract attributes
- /// through the Attributor \p A.
- ///
- /// If a use leads to a potential capture, \p CapturedInMemory is set and the
- /// search is stopped. If a use leads to a return instruction,
- /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
- /// If a use leads to a ptr2int which may capture the value,
- /// \p CapturedInInteger is set. If a use is found that is currently assumed
- /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
- /// set. All values in \p PotentialCopies are later tracked as well. For every
- /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
- /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
- /// conservatively set to true.
- AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
- const AAIsDead &IsDeadAA, AANoCapture::StateType &State,
- SmallSetVector<Value *, 4> &PotentialCopies,
- unsigned &RemainingUsesToExplore)
- : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
- PotentialCopies(PotentialCopies),
- RemainingUsesToExplore(RemainingUsesToExplore) {}
-
- /// Determine if \p V maybe captured. *Also updates the state!*
- bool valueMayBeCaptured(const Value *V) {
- if (V->getType()->isPointerTy()) {
- PointerMayBeCaptured(V, this);
- } else {
- State.indicatePessimisticFixpoint();
- }
- return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
- }
-
- /// See CaptureTracker::tooManyUses().
- void tooManyUses() override {
- State.removeAssumedBits(AANoCapture::NO_CAPTURE);
- }
- bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
- if (CaptureTracker::isDereferenceableOrNull(O, DL))
- return true;
- const auto &DerefAA = A.getAAFor<AADereferenceable>(
- NoCaptureAA, IRPosition::value(*O), DepClassTy::OPTIONAL);
- return DerefAA.getAssumedDereferenceableBytes();
- }
-
- /// See CaptureTracker::captured(...).
- bool captured(const Use *U) override {
- Instruction *UInst = cast<Instruction>(U->getUser());
- LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
- << "\n");
-
- // Because we may reuse the tracker multiple times we keep track of the
- // number of explored uses ourselves as well.
- if (RemainingUsesToExplore-- == 0) {
- LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
- /* Return */ true);
- }
+ /// Check the use \p U and update \p State accordingly. Return true if we
+ /// should continue to update the state.
+ bool checkUse(Attributor &A, AANoCapture::StateType &State, const Use &U,
+ bool &Follow) {
+ Instruction *UInst = cast<Instruction>(U.getUser());
+ LLVM_DEBUG(dbgs() << "[AANoCapture] Check use: " << *U.get() << " in "
+ << *UInst << "\n");
// Deal with ptr2int by following uses.
if (isa<PtrToIntInst>(UInst)) {
LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
- return valueMayBeCaptured(UInst);
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
+ /* Return */ true);
}
- // For stores we check if we can follow the value through memory or not.
- if (auto *SI = dyn_cast<StoreInst>(UInst)) {
- if (SI->isVolatile())
- return isCapturedIn(/* Memory */ true, /* Integer */ false,
- /* Return */ false);
- bool UsedAssumedInformation = false;
- if (!AA::getPotentialCopiesOfStoredValue(
- A, *SI, PotentialCopies, NoCaptureAA, UsedAssumedInformation))
- return isCapturedIn(/* Memory */ true, /* Integer */ false,
- /* Return */ false);
- // Not captured directly, potential copies will be checked.
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ // For stores we already checked if we can follow them, if they make it
+ // here we give up.
+ if (isa<StoreInst>(UInst))
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ false,
/* Return */ false);
- }
// Explicitly catch return instructions.
if (isa<ReturnInst>(UInst)) {
- if (UInst->getFunction() == NoCaptureAA.getAnchorScope())
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ if (UInst->getFunction() == getAnchorScope())
+ return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ true);
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
/* Return */ true);
}
// For now we only use special logic for call sites. However, the tracker
// itself knows about a lot of other non-capturing cases already.
auto *CB = dyn_cast<CallBase>(UInst);
- if (!CB || !CB->isArgOperand(U))
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ if (!CB || !CB->isArgOperand(&U))
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
/* Return */ true);
- unsigned ArgNo = CB->getArgOperandNo(U);
+ unsigned ArgNo = CB->getArgOperandNo(&U);
const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
// If we have a abstract no-capture attribute for the argument we can use
// it to justify a non-capture attribute here. This allows recursion!
auto &ArgNoCaptureAA =
- A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos, DepClassTy::REQUIRED);
+ A.getAAFor<AANoCapture>(*this, CSArgPos, DepClassTy::REQUIRED);
if (ArgNoCaptureAA.isAssumedNoCapture())
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ false);
if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
- addPotentialCopy(*CB);
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ Follow = true;
+ return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ false);
}
// Lastly, we could not find a reason no-capture can be assumed so we don't.
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
/* Return */ true);
}
- /// Register \p CS as potential copy of the value we are checking.
- void addPotentialCopy(CallBase &CB) { PotentialCopies.insert(&CB); }
-
- /// See CaptureTracker::shouldExplore(...).
- bool shouldExplore(const Use *U) override {
- // Check liveness and ignore droppable users.
- bool UsedAssumedInformation = false;
- return !U->getUser()->isDroppable() &&
- !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA,
- UsedAssumedInformation);
- }
-
- /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
- /// \p CapturedInRet, then return the appropriate value for use in the
- /// CaptureTracker::captured() interface.
- bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
- bool CapturedInRet) {
+ /// Update \p State according to \p CapturedInMem, \p CapturedInInt, and
+ /// \p CapturedInRet, then return true if we should continue updating the
+ /// state.
+ static bool isCapturedIn(AANoCapture::StateType &State, bool CapturedInMem,
+ bool CapturedInInt, bool CapturedInRet) {
LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
<< CapturedInInt << "|Ret " << CapturedInRet << "]\n");
if (CapturedInMem)
@@ -5008,27 +5217,8 @@ struct AACaptureUseTracker final : public CaptureTracker {
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
if (CapturedInRet)
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
- return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
}
-
-private:
- /// The attributor providing in-flight abstract attributes.
- Attributor &A;
-
- /// The abstract attribute currently updated.
- AANoCapture &NoCaptureAA;
-
- /// The abstract liveness state.
- const AAIsDead &IsDeadAA;
-
- /// The state currently updated.
- AANoCapture::StateType &State;
-
- /// Set of potential copies of the tracked value.
- SmallSetVector<Value *, 4> &PotentialCopies;
-
- /// Global counter to limit the number of explored uses.
- unsigned &RemainingUsesToExplore;
};
ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
@@ -5042,7 +5232,6 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
assert(F && "Expected a function!");
const IRPosition &FnPos = IRPosition::function(*F);
- const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos, DepClassTy::NONE);
AANoCapture::StateType T;
@@ -5059,6 +5248,8 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
// AAReturnedValues, e.g., track all values that escape through returns
// directly somehow.
auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
+ if (!RVAA.getState().isValidState())
+ return false;
bool SeenConstant = false;
for (auto &It : RVAA.returned_values()) {
if (isa<Constant>(It.first)) {
@@ -5094,21 +5285,27 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
}
}
- // Use the CaptureTracker interface and logic with the specialized tracker,
- // defined in AACaptureUseTracker, that can look at in-flight abstract
- // attributes and directly updates the assumed state.
- SmallSetVector<Value *, 4> PotentialCopies;
- unsigned RemainingUsesToExplore =
- getDefaultMaxUsesToExploreForCaptureTracking();
- AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
- RemainingUsesToExplore);
+ auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) {
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ *this, IRPosition::value(*O), DepClassTy::OPTIONAL);
+ return DerefAA.getAssumedDereferenceableBytes();
+ };
- // Check all potential copies of the associated value until we can assume
- // none will be captured or we have to assume at least one might be.
- unsigned Idx = 0;
- PotentialCopies.insert(V);
- while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
- Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
+ auto UseCheck = [&](const Use &U, bool &Follow) -> bool {
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::NO_CAPTURE:
+ return true;
+ case UseCaptureKind::MAY_CAPTURE:
+ return checkUse(A, T, U, Follow);
+ case UseCaptureKind::PASSTHROUGH:
+ Follow = true;
+ return true;
+ }
+ llvm_unreachable("Unexpected use capture kind!");
+ };
+
+ if (!A.checkForAllUses(UseCheck, *this, *V))
+ return indicatePessimisticFixpoint();
AANoCapture::StateType &S = getState();
auto Assumed = S.getAssumed();
@@ -5208,6 +5405,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
+} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5219,7 +5417,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return false;
LLVM_DEBUG({
- if (SimplifiedAssociatedValue.hasValue())
+ if (SimplifiedAssociatedValue)
dbgs() << "[ValueSimplify] is assumed to be "
<< **SimplifiedAssociatedValue << "\n";
else
@@ -5228,6 +5426,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
+namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -5243,9 +5442,9 @@ struct AAValueSimplifyImpl : AAValueSimplify {
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
LLVM_DEBUG({
- errs() << "SAV: " << SimplifiedAssociatedValue << " ";
+ dbgs() << "SAV: " << (bool)SimplifiedAssociatedValue << " ";
if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue)
- errs() << "SAV: " << **SimplifiedAssociatedValue << " ";
+ dbgs() << "SAV: " << **SimplifiedAssociatedValue << " ";
});
return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple")
: "not-simple";
@@ -5259,24 +5458,101 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return SimplifiedAssociatedValue;
}
+ /// Ensure the return value is \p V with type \p Ty, if not possible return
+ /// nullptr. If \p Check is true we will only verify such an operation would
+ /// suceed and return a non-nullptr value if that is the case. No IR is
+ /// generated or modified.
+ static Value *ensureType(Attributor &A, Value &V, Type &Ty, Instruction *CtxI,
+ bool Check) {
+ if (auto *TypedV = AA::getWithType(V, Ty))
+ return TypedV;
+ if (CtxI && V.getType()->canLosslesslyBitCastTo(&Ty))
+ return Check ? &V
+ : BitCastInst::CreatePointerBitCastOrAddrSpaceCast(&V, &Ty,
+ "", CtxI);
+ return nullptr;
+ }
+
+ /// Reproduce \p I with type \p Ty or return nullptr if that is not posisble.
+ /// If \p Check is true we will only verify such an operation would suceed and
+ /// return a non-nullptr value if that is the case. No IR is generated or
+ /// modified.
+ static Value *reproduceInst(Attributor &A,
+ const AbstractAttribute &QueryingAA,
+ Instruction &I, Type &Ty, Instruction *CtxI,
+ bool Check, ValueToValueMapTy &VMap) {
+ assert(CtxI && "Cannot reproduce an instruction without context!");
+ if (Check && (I.mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(&I, CtxI, /* DT */ nullptr,
+ /* TLI */ nullptr)))
+ return nullptr;
+ for (Value *Op : I.operands()) {
+ Value *NewOp = reproduceValue(A, QueryingAA, *Op, Ty, CtxI, Check, VMap);
+ if (!NewOp) {
+ assert(Check && "Manifest of new value unexpectedly failed!");
+ return nullptr;
+ }
+ if (!Check)
+ VMap[Op] = NewOp;
+ }
+ if (Check)
+ return &I;
+
+ Instruction *CloneI = I.clone();
+ // TODO: Try to salvage debug information here.
+ CloneI->setDebugLoc(DebugLoc());
+ VMap[&I] = CloneI;
+ CloneI->insertBefore(CtxI);
+ RemapInstruction(CloneI, VMap);
+ return CloneI;
+ }
+
+ /// Reproduce \p V with type \p Ty or return nullptr if that is not posisble.
+ /// If \p Check is true we will only verify such an operation would suceed and
+ /// return a non-nullptr value if that is the case. No IR is generated or
+ /// modified.
+ static Value *reproduceValue(Attributor &A,
+ const AbstractAttribute &QueryingAA, Value &V,
+ Type &Ty, Instruction *CtxI, bool Check,
+ ValueToValueMapTy &VMap) {
+ if (const auto &NewV = VMap.lookup(&V))
+ return NewV;
+ bool UsedAssumedInformation = false;
+ Optional<Value *> SimpleV =
+ A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation);
+ if (!SimpleV)
+ return PoisonValue::get(&Ty);
+ Value *EffectiveV = &V;
+ if (SimpleV.getValue())
+ EffectiveV = SimpleV.getValue();
+ if (auto *C = dyn_cast<Constant>(EffectiveV))
+ if (!C->canTrap())
+ return C;
+ if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI),
+ A.getInfoCache()))
+ return ensureType(A, *EffectiveV, Ty, CtxI, Check);
+ if (auto *I = dyn_cast<Instruction>(EffectiveV))
+ if (Value *NewV = reproduceInst(A, QueryingAA, *I, Ty, CtxI, Check, VMap))
+ return ensureType(A, *NewV, Ty, CtxI, Check);
+ return nullptr;
+ }
+
/// Return a value we can use as replacement for the associated one, or
/// nullptr if we don't have one that makes sense.
- Value *getReplacementValue(Attributor &A) const {
- Value *NewV;
- NewV = SimplifiedAssociatedValue.hasValue()
- ? SimplifiedAssociatedValue.getValue()
- : UndefValue::get(getAssociatedType());
- if (!NewV)
- return nullptr;
- NewV = AA::getWithType(*NewV, *getAssociatedType());
- if (!NewV || NewV == &getAssociatedValue())
- return nullptr;
- const Instruction *CtxI = getCtxI();
- if (CtxI && !AA::isValidAtPosition(*NewV, *CtxI, A.getInfoCache()))
- return nullptr;
- if (!CtxI && !AA::isValidInScope(*NewV, getAnchorScope()))
- return nullptr;
- return NewV;
+ Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const {
+ Value *NewV = SimplifiedAssociatedValue
+ ? SimplifiedAssociatedValue.getValue()
+ : UndefValue::get(getAssociatedType());
+ if (NewV && NewV != &getAssociatedValue()) {
+ ValueToValueMapTy VMap;
+ // First verify we can reprduce the value with the required type at the
+ // context location before we actually start modifying the IR.
+ if (reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI,
+ /* CheckOnly */ true, VMap))
+ return reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI,
+ /* CheckOnly */ false, VMap);
+ }
+ return nullptr;
}
/// Helper function for querying AAValueSimplify and updating candicate.
@@ -5300,14 +5576,14 @@ struct AAValueSimplifyImpl : AAValueSimplify {
const auto &AA =
A.getAAFor<AAType>(*this, getIRPosition(), DepClassTy::NONE);
- Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A);
+ Optional<Constant *> COpt = AA.getAssumedConstant(A);
- if (!COpt.hasValue()) {
+ if (!COpt) {
SimplifiedAssociatedValue = llvm::None;
A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
return true;
}
- if (auto *C = COpt.getValue()) {
+ if (auto *C = *COpt) {
SimplifiedAssociatedValue = C;
A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
return true;
@@ -5318,7 +5594,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
bool askSimplifiedValueForOtherAAs(Attributor &A) {
if (askSimplifiedValueFor<AAValueConstantRange>(A))
return true;
- if (askSimplifiedValueFor<AAPotentialValues>(A))
+ if (askSimplifiedValueFor<AAPotentialConstantValues>(A))
return true;
return false;
}
@@ -5326,14 +5602,18 @@ struct AAValueSimplifyImpl : AAValueSimplify {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- if (getAssociatedValue().user_empty())
- return Changed;
-
- if (auto *NewV = getReplacementValue(A)) {
- LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() << " -> "
- << *NewV << " :: " << *this << "\n");
- if (A.changeValueAfterManifest(getAssociatedValue(), *NewV))
- Changed = ChangeStatus::CHANGED;
+ for (auto &U : getAssociatedValue().uses()) {
+ // Check if we need to adjust the insertion point to make sure the IR is
+ // valid.
+ Instruction *IP = dyn_cast<Instruction>(U.getUser());
+ if (auto *PHI = dyn_cast_or_null<PHINode>(IP))
+ IP = PHI->getIncomingBlock(U)->getTerminator();
+ if (auto *NewV = manifestReplacementValue(A, IP)) {
+ LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue()
+ << " -> " << *NewV << " :: " << *this << "\n");
+ if (A.changeUseAfterManifest(U, *NewV))
+ Changed = ChangeStatus::CHANGED;
+ }
}
return Changed | AAValueSimplify::manifest(A);
@@ -5344,73 +5624,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
SimplifiedAssociatedValue = &getAssociatedValue();
return AAValueSimplify::indicatePessimisticFixpoint();
}
-
- static bool handleLoad(Attributor &A, const AbstractAttribute &AA,
- LoadInst &L, function_ref<bool(Value &)> Union) {
- auto UnionWrapper = [&](Value &V, Value &Obj) {
- if (isa<AllocaInst>(Obj))
- return Union(V);
- if (!AA::isDynamicallyUnique(A, AA, V))
- return false;
- if (!AA::isValidAtPosition(V, L, A.getInfoCache()))
- return false;
- return Union(V);
- };
-
- Value &Ptr = *L.getPointerOperand();
- SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L))
- return false;
-
- const auto *TLI =
- A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction());
- for (Value *Obj : Objects) {
- LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
- if (isa<UndefValue>(Obj))
- continue;
- if (isa<ConstantPointerNull>(Obj)) {
- // A null pointer access can be undefined but any offset from null may
- // be OK. We do not try to optimize the latter.
- bool UsedAssumedInformation = false;
- if (!NullPointerIsDefined(L.getFunction(),
- Ptr.getType()->getPointerAddressSpace()) &&
- A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj)
- continue;
- return false;
- }
- Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI);
- if (!InitialVal || !Union(*InitialVal))
- return false;
-
- LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store "
- "propagation, checking accesses next.\n");
-
- auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
- LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
- if (Acc.isWrittenValueYetUndetermined())
- return true;
- Value *Content = Acc.getWrittenValue();
- if (!Content)
- return false;
- Value *CastedContent =
- AA::getWithType(*Content, *AA.getAssociatedType());
- if (!CastedContent)
- return false;
- if (IsExact)
- return UnionWrapper(*CastedContent, *Obj);
- if (auto *C = dyn_cast<Constant>(CastedContent))
- if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C))
- return UnionWrapper(*CastedContent, *Obj);
- return false;
- };
-
- auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
- DepClassTy::REQUIRED);
- if (!PI.forallInterferingWrites(A, AA, L, CheckAccess))
- return false;
- }
- return true;
- }
};
struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
@@ -5425,15 +5638,6 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
Attribute::StructRet, Attribute::Nest, Attribute::ByVal},
/* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();
-
- // FIXME: This is a hack to prevent us from propagating function poiner in
- // the new pass manager CGSCC pass as it creates call edges the
- // CallGraphUpdater cannot handle yet.
- Value &V = getAssociatedValue();
- if (V.getType()->isPointerTy() &&
- V.getType()->getPointerElementType()->isFunctionTy() &&
- !A.isModulePass())
- indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
@@ -5466,7 +5670,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleArgOp =
A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation);
- if (!SimpleArgOp.hasValue())
+ if (!SimpleArgOp)
return true;
if (!SimpleArgOp.getValue())
return false;
@@ -5477,14 +5681,14 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
// Generate a answer specific to a call site context.
bool Success;
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (hasCallBaseContext() &&
getCallBaseContext()->getCalledFunction() == Arg->getParent())
Success = PredForCallSite(
AbstractCallSite(&getCallBaseContext()->getCalledOperandUse()));
else
Success = A.checkForAllCallSites(PredForCallSite, *this, true,
- AllCallSitesKnown);
+ UsedAssumedInformation);
if (!Success)
if (!askSimplifiedValueForOtherAAs(A))
@@ -5516,12 +5720,16 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
- auto PredForReturned = [&](Value &V) {
- return checkAndUpdate(A, *this,
- IRPosition::value(V, getCallBaseContext()));
+ auto ReturnInstCB = [&](Instruction &I) {
+ auto &RI = cast<ReturnInst>(I);
+ return checkAndUpdate(
+ A, *this,
+ IRPosition::value(*RI.getReturnValue(), getCallBaseContext()));
};
- if (!A.checkForAllReturnedValues(PredForReturned, *this))
+ bool UsedAssumedInformation = false;
+ if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
+ UsedAssumedInformation))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
@@ -5531,29 +5739,9 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
}
ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- if (auto *NewV = getReplacementValue(A)) {
- auto PredForReturned =
- [&](Value &, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
- for (ReturnInst *RI : RetInsts) {
- Value *ReturnedVal = RI->getReturnValue();
- if (ReturnedVal == NewV || isa<UndefValue>(ReturnedVal))
- return true;
- assert(RI->getFunction() == getAnchorScope() &&
- "ReturnInst in wrong function!");
- LLVM_DEBUG(dbgs()
- << "[ValueSimplify] " << *ReturnedVal << " -> "
- << *NewV << " in " << *RI << " :: " << *this << "\n");
- if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV))
- Changed = ChangeStatus::CHANGED;
- }
- return true;
- };
- A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this);
- }
-
- return Changed | AAValueSimplify::manifest(A);
+ // We queried AAValueSimplify for the returned values so they will be
+ // replaced if a simplified form was found. Nothing to do here.
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -5597,7 +5785,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return true;
if (!SimplifiedLHS.getValue())
return false;
@@ -5606,7 +5794,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return true;
if (!SimplifiedRHS.getValue())
return false;
@@ -5662,15 +5850,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
return true;
}
- bool updateWithLoad(Attributor &A, LoadInst &L) {
- auto Union = [&](Value &V) {
- SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
- SimplifiedAssociatedValue, &V, L.getType());
- return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
- };
- return handleLoad(A, *this, L, Union);
- }
-
/// Use the generic, non-optimistic InstSimplfy functionality if we managed to
/// simplify any operand of the instruction \p I. Return true if successful,
/// in that case SimplifiedAssociatedValue will be updated.
@@ -5686,7 +5865,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
*this, UsedAssumedInformation);
// If we are not sure about any operand we are not sure about the entire
// instruction, we'll wait.
- if (!SimplifiedOp.hasValue())
+ if (!SimplifiedOp)
return true;
if (SimplifiedOp.getValue())
@@ -5714,7 +5893,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
const DataLayout &DL = I.getModule()->getDataLayout();
SimplifyQuery Q(DL, TLI, DT, AC, &I);
if (Value *SimplifiedI =
- SimplifyInstructionWithOperands(&I, NewOps, Q, ORE)) {
+ simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) {
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, SimplifiedI, I.getType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
@@ -5726,6 +5905,36 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
+ // Do not simplify loads that are only used in llvm.assume if we cannot also
+ // remove all stores that may feed into the load. The reason is that the
+ // assume is probably worth something as long as the stores are around.
+ if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) {
+ InformationCache &InfoCache = A.getInfoCache();
+ if (InfoCache.isOnlyUsedByAssume(*LI)) {
+ SmallSetVector<Value *, 4> PotentialCopies;
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ bool UsedAssumedInformation = false;
+ if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies,
+ PotentialValueOrigins, *this,
+ UsedAssumedInformation,
+ /* OnlyExact */ true)) {
+ if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) {
+ if (!I)
+ return true;
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ return A.isAssumedDead(SI->getOperandUse(0), this,
+ /* LivenessAA */ nullptr,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false);
+ return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false);
+ }))
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+
auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
bool Stripped) -> bool {
auto &AA = A.getAAFor<AAValueSimplify>(
@@ -5734,9 +5943,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!Stripped && this == &AA) {
if (auto *I = dyn_cast<Instruction>(&V)) {
- if (auto *LI = dyn_cast<LoadInst>(&V))
- if (updateWithLoad(A, *LI))
- return true;
if (auto *Cmp = dyn_cast<CmpInst>(&V))
if (handleCmp(A, *Cmp))
return true;
@@ -5754,8 +5960,10 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
};
bool Dummy = false;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy,
VisitValueCB, getCtxI(),
+ UsedAssumedInformation,
/* UseValueSimplify */ false))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
@@ -5806,8 +6014,23 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
- if (!getAssociatedFunction())
+ Function *Fn = getAssociatedFunction();
+ if (!Fn) {
indicatePessimisticFixpoint();
+ return;
+ }
+ for (Argument &Arg : Fn->args()) {
+ if (Arg.hasReturnedAttr()) {
+ auto IRP = IRPosition::callsite_argument(*cast<CallBase>(getCtxI()),
+ Arg.getArgNo());
+ if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_ARGUMENT &&
+ checkAndUpdate(A, *this, IRP))
+ indicateOptimisticFixpoint();
+ else
+ indicatePessimisticFixpoint();
+ return;
+ }
+ }
}
/// See AbstractAttribute::updateImpl(...).
@@ -5845,8 +6068,13 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ // TODO: We should avoid simplification duplication to begin with.
+ auto *FloatAA = A.lookupAAFor<AAValueSimplify>(
+ IRPosition::value(getAssociatedValue()), this, DepClassTy::NONE);
+ if (FloatAA && FloatAA->getState().isValidState())
+ return Changed;
- if (auto *NewV = getReplacementValue(A)) {
+ if (auto *NewV = manifestReplacementValue(A, getCtxI())) {
Use &U = cast<CallBase>(&getAnchorValue())
->getArgOperandUse(getCallSiteArgNo());
if (A.changeUseAfterManifest(U, *NewV))
@@ -5860,8 +6088,10 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
STATS_DECLTRACK_CSARG_ATTR(value_simplify)
}
};
+} // namespace
/// ----------------------- Heap-To-Stack Conversion ---------------------------
+namespace {
struct AAHeapToStackFunction final : public AAHeapToStack {
struct AllocationInfo {
@@ -5883,7 +6113,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool HasPotentiallyFreeingUnknownUses = false;
/// The set of free calls that use this allocation.
- SmallPtrSet<CallBase *, 1> PotentialFreeCalls{};
+ SmallSetVector<CallBase *, 1> PotentialFreeCalls{};
};
struct DeallocationInfo {
@@ -5895,7 +6125,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool MightFreeUnknownObjects = false;
/// The set of allocation calls that are potentially freed.
- SmallPtrSet<CallBase *, 1> PotentialAllocationCalls{};
+ SmallSetVector<CallBase *, 1> PotentialAllocationCalls{};
};
AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
@@ -5905,9 +6135,9 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
// Ensure we call the destructor so we release any memory allocated in the
// sets.
for (auto &It : AllocationInfos)
- It.getSecond()->~AllocationInfo();
+ It.second->~AllocationInfo();
for (auto &It : DeallocationInfos)
- It.getSecond()->~DeallocationInfo();
+ It.second->~DeallocationInfo();
}
void initialize(Attributor &A) override {
@@ -5932,7 +6162,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) {
AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB};
AllocationInfos[CB] = AI;
- TLI->getLibFunc(*CB, AI->LibraryFunctionId);
+ if (TLI)
+ TLI->getLibFunc(*CB, AI->LibraryFunctionId);
}
}
return true;
@@ -5945,6 +6176,16 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
/* CheckPotentiallyDead */ true);
(void)Success;
assert(Success && "Did not expect the call base visit callback to fail!");
+
+ Attributor::SimplifictionCallbackTy SCB =
+ [](const IRPosition &, const AbstractAttribute *,
+ bool &) -> Optional<Value *> { return nullptr; };
+ for (const auto &It : AllocationInfos)
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first),
+ SCB);
+ for (const auto &It : DeallocationInfos)
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first),
+ SCB);
}
const std::string getAsStr() const override {
@@ -5971,7 +6212,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool isAssumedHeapToStack(const CallBase &CB) const override {
if (isValidState())
- if (AllocationInfo *AI = AllocationInfos.lookup(&CB))
+ if (AllocationInfo *AI =
+ AllocationInfos.lookup(const_cast<CallBase *>(&CB)))
return AI->Status != AllocationInfo::INVALID;
return false;
}
@@ -6000,6 +6242,17 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+ LoopInfo *LI =
+ A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
+ Optional<bool> MayContainIrreducibleControl;
+ auto IsInLoop = [&](BasicBlock &BB) {
+ if (!MayContainIrreducibleControl.has_value())
+ MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
+ if (MayContainIrreducibleControl.value())
+ return true;
+ return LI->getLoopFor(&BB) != nullptr;
+ };
+
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
@@ -6026,13 +6279,13 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
else
A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark);
+ const DataLayout &DL = A.getInfoCache().getDL();
Value *Size;
Optional<APInt> SizeAPI = getSize(A, *this, AI);
- if (SizeAPI.hasValue()) {
+ if (SizeAPI) {
Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI);
} else {
LLVMContext &Ctx = AI.CB->getContext();
- auto &DL = A.getInfoCache().getDL();
ObjectSizeOpts Opts;
ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, Opts);
SizeOffsetEvalType SizeOffsetPair = Eval.compute(AI.CB);
@@ -6041,32 +6294,36 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Size = SizeOffsetPair.first;
}
+ Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent()))
+ ? AI.CB
+ : &F->getEntryBlock().front();
+
Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
- Alignment = max(Alignment, RetAlign);
+ Alignment = std::max(Alignment, *RetAlign);
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align);
- assert(AlignmentAPI.hasValue() &&
+ assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 &&
"Expected an alignment during manifest!");
- Alignment =
- max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue()));
+ Alignment = std::max(
+ Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue()));
}
- unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace();
- Instruction *Alloca =
- new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
- "", AI.CB->getNextNode());
+ // TODO: Hoist the alloca towards the function entry.
+ unsigned AS = DL.getAllocaAddrSpace();
+ Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
+ Size, Alignment, "", IP);
if (Alloca->getType() != AI.CB->getType())
- Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc",
- Alloca->getNextNode());
+ Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Alloca, AI.CB->getType(), "malloc_cast", AI.CB);
auto *I8Ty = Type::getInt8Ty(F->getContext());
auto *InitVal = getInitialValueOfAllocation(AI.CB, TLI, I8Ty);
assert(InitVal &&
"Must be able to materialize initial memory state of allocation");
- A.changeValueAfterManifest(*AI.CB, *Alloca);
+ A.changeAfterManifest(IRPosition::inst(*AI.CB), *Alloca);
if (auto *II = dyn_cast<InvokeInst>(AI.CB)) {
auto *NBB = II->getNormalDest();
@@ -6095,7 +6352,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleV =
A.getAssumedConstant(V, AA, UsedAssumedInformation);
- if (!SimpleV.hasValue())
+ if (!SimpleV)
return APInt(64, 0);
if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
return CI->getValue();
@@ -6120,11 +6377,11 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
/// Collection of all malloc-like calls in a function with associated
/// information.
- DenseMap<CallBase *, AllocationInfo *> AllocationInfos;
+ MapVector<CallBase *, AllocationInfo *> AllocationInfos;
/// Collection of all free-like calls in a function with associated
/// information.
- DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos;
+ MapVector<CallBase *, DeallocationInfo *> DeallocationInfos;
ChangeStatus updateImpl(Attributor &A) override;
};
@@ -6167,7 +6424,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
// branches etc.
SmallVector<Value *, 8> Objects;
if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
- *this, DI.CB)) {
+ *this, DI.CB,
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
@@ -6239,6 +6497,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
dbgs() << "[H2S] unique free call might free unknown allocations\n");
return false;
}
+ if (DI->PotentialAllocationCalls.empty())
+ return true;
if (DI->PotentialAllocationCalls.size() > 1) {
LLVM_DEBUG(dbgs() << "[H2S] unique free call might free "
<< DI->PotentialAllocationCalls.size()
@@ -6316,7 +6576,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
if (ValidUsesOnly &&
AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
- A.emitRemark<OptimizationRemarkMissed>(AI.CB, "OMP113", Remark);
+ A.emitRemark<OptimizationRemarkMissed>(CB, "OMP113", Remark);
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
ValidUsesOnly = false;
@@ -6348,7 +6608,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
continue;
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
- if (!getAPInt(A, *this, *Align)) {
+ Optional<APInt> APAlign = getAPInt(A, *this, *Align);
+ if (!APAlign) {
// Can't generate an alloca which respects the required alignment
// on the allocation.
LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB
@@ -6356,14 +6617,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
+ } else {
+ if (APAlign->ugt(llvm::Value::MaximumAlignment) ||
+ !APAlign->isPowerOf2()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign
+ << "\n");
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
+ }
}
}
if (MaxHeapToStackSize != -1) {
Optional<APInt> Size = getSize(A, *this, AI);
- if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
+ if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
- if (!Size.hasValue())
+ if (!Size)
dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
else
dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
@@ -6395,8 +6665,10 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
return Changed;
}
+} // namespace
/// ----------------------- Privatizable Pointers ------------------------------
+namespace {
struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {}
@@ -6414,9 +6686,9 @@ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
/// Return a privatizable type that encloses both T0 and T1.
/// TODO: This is merely a stub for now as we should manage a mapping as well.
Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
- if (!T0.hasValue())
+ if (!T0)
return T1;
- if (!T1.hasValue())
+ if (!T1)
return T0;
if (T0 == T1)
return T0;
@@ -6445,11 +6717,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
// If this is a byval argument and we know all the call sites (so we can
// rewrite them), there is no need to check them explicitly.
- bool AllCallSitesKnown;
- if (getIRPosition().hasAttr(Attribute::ByVal) &&
+ bool UsedAssumedInformation = false;
+ SmallVector<Attribute, 1> Attrs;
+ getAttrs({Attribute::ByVal}, Attrs, /* IgnoreSubsumingPositions */ true);
+ if (!Attrs.empty() &&
A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
- true, AllCallSitesKnown))
- return getAssociatedValue().getType()->getPointerElementType();
+ true, UsedAssumedInformation))
+ return Attrs[0].getValueAsType();
Optional<Type *> Ty;
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
@@ -6474,9 +6748,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
- if (CSTy.hasValue() && CSTy.getValue())
+ if (CSTy && CSTy.getValue())
CSTy.getValue()->print(dbgs());
- else if (CSTy.hasValue())
+ else if (CSTy)
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
@@ -6486,19 +6760,20 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << " : New Type: ";
- if (Ty.hasValue() && Ty.getValue())
+ if (Ty && Ty.getValue())
Ty.getValue()->print(dbgs());
- else if (Ty.hasValue())
+ else if (Ty)
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
dbgs() << "\n";
});
- return !Ty.hasValue() || Ty.getValue();
+ return !Ty || Ty.getValue();
};
- if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
+ UsedAssumedInformation))
return nullptr;
return Ty;
}
@@ -6506,7 +6781,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
- if (!PrivatizableType.hasValue())
+ if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
@@ -6518,8 +6793,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// Avoid arguments with padding for now.
if (!getIRPosition().hasAttr(Attribute::ByVal) &&
- !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
- A.getInfoCache().getDL())) {
+ !isDenselyPacked(*PrivatizableType, A.getInfoCache().getDL())) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
return indicatePessimisticFixpoint();
}
@@ -6527,7 +6801,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
- identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+ identifyReplacementTypes(*PrivatizableType, ReplacementTypes);
// Verify callee and caller agree on how the promoted argument would be
// passed.
@@ -6545,9 +6819,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return TTI->areTypesABICompatible(
CB->getCaller(), CB->getCalledFunction(), ReplacementTypes);
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
- AllCallSitesKnown)) {
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
<< Fn.getName() << "\n");
@@ -6595,7 +6869,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
*this, IRPosition::argument(CBArg), DepClassTy::REQUIRED);
if (CBArgPrivAA.isValidState()) {
auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
- if (!CBArgPrivTy.hasValue())
+ if (!CBArgPrivTy)
continue;
if (CBArgPrivTy.getValue() == PrivatizableType)
continue;
@@ -6642,7 +6916,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
DepClassTy::REQUIRED);
if (DCArgPrivAA.isValidState()) {
auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
- if (!DCArgPrivTy.hasValue())
+ if (!DCArgPrivTy)
return true;
if (DCArgPrivTy.getValue() == PrivatizableType)
return true;
@@ -6674,7 +6948,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
};
if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
@@ -6749,8 +7023,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Type *PrivPtrType = PrivType->getPointerTo();
if (Base->getType() != PrivPtrType)
- Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "",
- ACS.getInstruction());
+ Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Base, PrivPtrType, "", ACS.getInstruction());
// Traverse the type, build GEPs and loads.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
@@ -6784,7 +7058,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
/// See AbstractAttribute::manifest(...)
ChangeStatus manifest(Attributor &A) override {
- if (!PrivatizableType.hasValue())
+ if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
assert(PrivatizableType.getValue() && "Expected privatizable type!");
@@ -6817,14 +7091,16 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Function &ReplacementFn, Function::arg_iterator ArgIt) {
BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
- Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+ const DataLayout &DL = IP->getModule()->getDataLayout();
+ unsigned AS = DL.getAllocaAddrSpace();
+ Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS,
Arg->getName() + ".priv", IP);
createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
if (AI->getType() != Arg->getType())
- AI =
- BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP);
+ AI = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ AI, Arg->getType(), "", IP);
Arg->replaceAllUsesWith(AI);
for (CallInst *CI : TailCalls)
@@ -6841,8 +7117,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// When no alignment is specified for the load instruction,
// natural alignment is assumed.
createReplacementValues(
- assumeAligned(AlignAA.getAssumedAlign()),
- PrivatizableType.getValue(), ACS,
+ AlignAA.getAssumedAlign(), *PrivatizableType, ACS,
ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
NewArgOperands);
};
@@ -6850,7 +7125,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
- identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+ identifyReplacementTypes(*PrivatizableType, ReplacementTypes);
// Register a rewrite of the argument.
if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes,
@@ -6897,7 +7172,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
auto &PrivArgAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*Arg), DepClassTy::REQUIRED);
if (PrivArgAA.isAssumedPrivatizablePtr())
- return Obj->getType()->getPointerElementType();
+ return PrivArgAA.getPrivatizableType();
}
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
@@ -6926,7 +7201,7 @@ struct AAPrivatizablePtrCallSiteArgument final
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
- if (!PrivatizableType.hasValue())
+ if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
@@ -6992,10 +7267,12 @@ struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
}
};
+} // namespace
/// -------------------- Memory Behavior Attributes ----------------------------
/// Includes read-none, read-only, and write-only.
/// ----------------------------------------------------------------------------
+namespace {
struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A)
: AAMemoryBehavior(IRP, A) {}
@@ -7495,6 +7772,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
+} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7528,6 +7806,7 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr(
return S;
}
+namespace {
struct AAMemoryLocationImpl : public AAMemoryLocation {
AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
@@ -7772,8 +8051,10 @@ void AAMemoryLocationImpl::categorizePtrValue(
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
SmallVector<Value *, 8> Objects;
+ bool UsedAssumedInformation = false;
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
- /* Intraprocedural */ true)) {
+ UsedAssumedInformation,
+ AA::Intraprocedural)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
@@ -8042,9 +8323,11 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
STATS_DECLTRACK_CS_ATTR(readnone)
}
};
+} // namespace
/// ------------------ Value Constant Range Attribute -------------------------
+namespace {
struct AAValueConstantRangeImpl : AAValueConstantRange {
using StateType = IntegerRangeState;
AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A)
@@ -8379,7 +8662,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return true;
if (!SimplifiedLHS.getValue())
return false;
@@ -8388,7 +8671,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return true;
if (!SimplifiedRHS.getValue())
return false;
@@ -8432,7 +8715,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedOpV =
A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedOpV.hasValue())
+ if (!SimplifiedOpV)
return true;
if (!SimplifiedOpV.getValue())
return false;
@@ -8462,7 +8745,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return true;
if (!SimplifiedLHS.getValue())
return false;
@@ -8471,7 +8754,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return true;
if (!SimplifiedRHS.getValue())
return false;
@@ -8536,7 +8819,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedOpV =
A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedOpV.hasValue())
+ if (!SimplifiedOpV)
return true;
if (!SimplifiedOpV.getValue())
return false;
@@ -8588,8 +8871,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
IntegerRangeState T(getBitWidth());
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T,
VisitValueCB, getCtxI(),
+ UsedAssumedInformation,
/* UseValueSimplify */ false))
return indicatePessimisticFixpoint();
@@ -8683,21 +8968,23 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
STATS_DECLTRACK_CSARG_ATTR(value_range)
}
};
+} // namespace
/// ------------------ Potential Values Attribute -------------------------
-struct AAPotentialValuesImpl : AAPotentialValues {
+namespace {
+struct AAPotentialConstantValuesImpl : AAPotentialConstantValues {
using StateType = PotentialConstantIntValuesState;
- AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
- : AAPotentialValues(IRP, A) {}
+ AAPotentialConstantValuesImpl(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValues(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
if (A.hasSimplificationCallback(getIRPosition()))
indicatePessimisticFixpoint();
else
- AAPotentialValues::initialize(A);
+ AAPotentialConstantValues::initialize(A);
}
/// See AbstractAttribute::getAsStr().
@@ -8714,13 +9001,14 @@ struct AAPotentialValuesImpl : AAPotentialValues {
}
};
-struct AAPotentialValuesArgument final
- : AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
+struct AAPotentialConstantValuesArgument final
+ : AAArgumentFromCallSiteArguments<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl,
PotentialConstantIntValuesState> {
- using Base =
- AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
- PotentialConstantIntValuesState>;
- AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
+ using Base = AAArgumentFromCallSiteArguments<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl,
+ PotentialConstantIntValuesState>;
+ AAPotentialConstantValuesArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(..).
@@ -8738,11 +9026,12 @@ struct AAPotentialValuesArgument final
}
};
-struct AAPotentialValuesReturned
- : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
- using Base =
- AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
- AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
+struct AAPotentialConstantValuesReturned
+ : AAReturnedFromReturnedValues<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl> {
+ using Base = AAReturnedFromReturnedValues<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl>;
+ AAPotentialConstantValuesReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
@@ -8751,13 +9040,13 @@ struct AAPotentialValuesReturned
}
};
-struct AAPotentialValuesFloating : AAPotentialValuesImpl {
- AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesImpl(IRP, A) {}
+struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
+ AAPotentialConstantValuesFloating(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
- AAPotentialValuesImpl::initialize(A);
+ AAPotentialConstantValuesImpl::initialize(A);
if (isAtFixpoint())
return;
@@ -8783,7 +9072,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
indicatePessimisticFixpoint();
- LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: "
+ LLVM_DEBUG(dbgs() << "[AAPotentialConstantValues] We give up: "
<< getAssociatedValue() << "\n");
}
@@ -8891,7 +9180,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
@@ -8900,7 +9189,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
@@ -8909,18 +9198,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
- DepClassTy::REQUIRED);
+ auto &LHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
if (!LHSAA.isValidState())
return indicatePessimisticFixpoint();
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
- DepClassTy::REQUIRED);
+ auto &RHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
if (!RHSAA.isValidState())
return indicatePessimisticFixpoint();
- const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
- const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+ const SetTy &LHSAAPVS = LHSAA.getAssumedSet();
+ const SetTy &RHSAAPVS = RHSAA.getAssumedSet();
// TODO: make use of undef flag to limit potential values aggressively.
bool MaybeTrue = false, MaybeFalse = false;
@@ -8974,7 +9263,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
@@ -8983,7 +9272,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
@@ -8997,21 +9286,21 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
// Check if we only need one operand.
bool OnlyLeft = false, OnlyRight = false;
- if (C.hasValue() && *C && (*C)->isOneValue())
+ if (C && *C && (*C)->isOneValue())
OnlyLeft = true;
- else if (C.hasValue() && *C && (*C)->isZeroValue())
+ else if (C && *C && (*C)->isZeroValue())
OnlyRight = true;
- const AAPotentialValues *LHSAA = nullptr, *RHSAA = nullptr;
+ const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr;
if (!OnlyRight) {
- LHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
- DepClassTy::REQUIRED);
+ LHSAA = &A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
if (!LHSAA->isValidState())
return indicatePessimisticFixpoint();
}
if (!OnlyLeft) {
- RHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
- DepClassTy::REQUIRED);
+ RHSAA = &A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
if (!RHSAA->isValidState())
return indicatePessimisticFixpoint();
}
@@ -9049,17 +9338,17 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedSrc =
A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedSrc.hasValue())
+ if (!SimplifiedSrc)
return ChangeStatus::UNCHANGED;
if (!SimplifiedSrc.getValue())
return indicatePessimisticFixpoint();
Src = *SimplifiedSrc;
- auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src),
- DepClassTy::REQUIRED);
+ auto &SrcAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*Src), DepClassTy::REQUIRED);
if (!SrcAA.isValidState())
return indicatePessimisticFixpoint();
- const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet();
+ const SetTy &SrcAAPVS = SrcAA.getAssumedSet();
if (SrcAA.undefIsContained())
unionAssumedWithUndef();
else {
@@ -9082,7 +9371,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
@@ -9091,7 +9380,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
@@ -9100,18 +9389,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
- DepClassTy::REQUIRED);
+ auto &LHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
if (!LHSAA.isValidState())
return indicatePessimisticFixpoint();
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
- DepClassTy::REQUIRED);
+ auto &RHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
if (!RHSAA.isValidState())
return indicatePessimisticFixpoint();
- const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
- const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+ const SetTy &LHSAAPVS = LHSAA.getAssumedSet();
+ const SetTy &RHSAAPVS = RHSAA.getAssumedSet();
const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
// TODO: make use of undef flag to limit potential values aggressively.
@@ -9150,13 +9439,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedIncomingValue = A.getAssumedSimplified(
IRPosition::value(*IncomingValue, getCallBaseContext()), *this,
UsedAssumedInformation);
- if (!SimplifiedIncomingValue.hasValue())
+ if (!SimplifiedIncomingValue)
continue;
if (!SimplifiedIncomingValue.getValue())
return indicatePessimisticFixpoint();
IncomingValue = *SimplifiedIncomingValue;
- auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>(
+ auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>(
*this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED);
if (!PotentialValuesAA.isValidState())
return indicatePessimisticFixpoint();
@@ -9169,30 +9458,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
: ChangeStatus::CHANGED;
}
- ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) {
- if (!L.getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- auto Union = [&](Value &V) {
- if (isa<UndefValue>(V)) {
- unionAssumedWithUndef();
- return true;
- }
- if (ConstantInt *CI = dyn_cast<ConstantInt>(&V)) {
- unionAssumed(CI->getValue());
- return true;
- }
- return false;
- };
- auto AssumedBefore = getAssumed();
-
- if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union))
- return indicatePessimisticFixpoint();
-
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
@@ -9213,9 +9478,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (auto *PHI = dyn_cast<PHINode>(I))
return updateWithPHINode(A, PHI);
- if (auto *L = dyn_cast<LoadInst>(I))
- return updateWithLoad(A, *L);
-
return indicatePessimisticFixpoint();
}
@@ -9225,14 +9487,15 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
}
};
-struct AAPotentialValuesFunction : AAPotentialValuesImpl {
- AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesImpl(IRP, A) {}
+struct AAPotentialConstantValuesFunction : AAPotentialConstantValuesImpl {
+ AAPotentialConstantValuesFunction(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
- "not be called");
+ llvm_unreachable(
+ "AAPotentialConstantValues(Function|CallSite)::updateImpl will "
+ "not be called");
}
/// See AbstractAttribute::trackStatistics()
@@ -9241,9 +9504,9 @@ struct AAPotentialValuesFunction : AAPotentialValuesImpl {
}
};
-struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
- AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesFunction(IRP, A) {}
+struct AAPotentialConstantValuesCallSite : AAPotentialConstantValuesFunction {
+ AAPotentialConstantValuesCallSite(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValuesFunction(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -9251,11 +9514,13 @@ struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
}
};
-struct AAPotentialValuesCallSiteReturned
- : AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> {
- AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AAPotentialValues,
- AAPotentialValuesImpl>(IRP, A) {}
+struct AAPotentialConstantValuesCallSiteReturned
+ : AACallSiteReturnedFromReturned<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl> {
+ AAPotentialConstantValuesCallSiteReturned(const IRPosition &IRP,
+ Attributor &A)
+ : AACallSiteReturnedFromReturned<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -9263,13 +9528,15 @@ struct AAPotentialValuesCallSiteReturned
}
};
-struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
- AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesFloating(IRP, A) {}
+struct AAPotentialConstantValuesCallSiteArgument
+ : AAPotentialConstantValuesFloating {
+ AAPotentialConstantValuesCallSiteArgument(const IRPosition &IRP,
+ Attributor &A)
+ : AAPotentialConstantValuesFloating(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
- AAPotentialValuesImpl::initialize(A);
+ AAPotentialConstantValuesImpl::initialize(A);
if (isAtFixpoint())
return;
@@ -9292,8 +9559,8 @@ struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
auto AssumedBefore = getAssumed();
- auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V),
- DepClassTy::REQUIRED);
+ auto &AA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(V), DepClassTy::REQUIRED);
const auto &S = AA.getAssumed();
unionAssumed(S);
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
@@ -9365,7 +9632,7 @@ struct AANoUndefImpl : AANoUndef {
// considered to be dead. We don't manifest noundef in such positions for
// the same reason above.
if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation)
- .hasValue())
+ .has_value())
return ChangeStatus::UNCHANGED;
return AANoUndef::manifest(A);
}
@@ -9400,8 +9667,10 @@ struct AANoUndefFloating : public AANoUndefImpl {
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
@@ -9518,9 +9787,10 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
// Process any value that we might call.
auto ProcessCalledOperand = [&](Value *V) {
bool DummyValue = false;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
DummyValue, VisitValue, nullptr,
- false)) {
+ UsedAssumedInformation, false)) {
// If we haven't gone through all values, assume that there are unknown
// callees.
setHasUnknownCallee(true, Change);
@@ -9530,7 +9800,9 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
CallBase *CB = cast<CallBase>(getCtxI());
if (CB->isInlineAsm()) {
- setHasUnknownCallee(false, Change);
+ if (!hasAssumption(*CB->getCaller(), "ompx_no_call_asm") &&
+ !hasAssumption(*CB, "ompx_no_call_asm"))
+ setHasUnknownCallee(false, Change);
return Change;
}
@@ -9584,7 +9856,8 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
// Visit all callable instructions.
bool UsedAssumedInformation = false;
if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this,
- UsedAssumedInformation)) {
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true)) {
// If we haven't looked at all call like instructions, assume that there
// are unknown callees.
setHasUnknownCallee(true, Change);
@@ -9656,7 +9929,7 @@ private:
ArrayRef<const AACallEdges *> AAEdgesList,
const Function &Fn) {
Optional<bool> Cached = isCachedReachable(Fn);
- if (Cached.hasValue())
+ if (Cached)
return Cached.getValue();
// The query was not cached, thus it is new. We need to request an update
@@ -9691,6 +9964,10 @@ private:
const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
for (Function *Edge : Edges) {
+ // Functions that do not call back into the module can be ignored.
+ if (Edge->hasFnAttribute(Attribute::NoCallback))
+ continue;
+
// We don't need a dependency if the result is reachable.
const AAFunctionReachability &EdgeReachability =
A.getAAFor<AAFunctionReachability>(
@@ -9820,22 +10097,21 @@ public:
}
// Update the Instruction queries.
- const AAReachability *Reachability;
if (!InstQueries.empty()) {
- Reachability = &A.getAAFor<AAReachability>(
+ const AAReachability *Reachability = &A.getAAFor<AAReachability>(
*this, IRPosition::function(*getAssociatedFunction()),
DepClassTy::REQUIRED);
- }
- // Check for local callbases first.
- for (auto &InstPair : InstQueries) {
- SmallVector<const AACallEdges *> CallEdges;
- bool AllKnown =
- getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
- // Update will return change if we this effects any queries.
- if (!AllKnown)
- InstPair.second.CanReachUnknownCallee = true;
- Change |= InstPair.second.update(A, *this, CallEdges);
+ // Check for local callbases first.
+ for (auto &InstPair : InstQueries) {
+ SmallVector<const AACallEdges *> CallEdges;
+ bool AllKnown =
+ getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
+ // Update will return change if we this effects any queries.
+ if (!AllKnown)
+ InstPair.second.CanReachUnknownCallee = true;
+ Change |= InstPair.second.update(A, *this, CallEdges);
+ }
}
return Change;
@@ -9862,13 +10138,15 @@ private:
/// Used to answer if a call base inside this function can reach a specific
/// function.
- DenseMap<const CallBase *, QueryResolver> CBQueries;
+ MapVector<const CallBase *, QueryResolver> CBQueries;
/// This is for instruction queries than scan "forward".
- DenseMap<const Instruction *, QueryResolver> InstQueries;
+ MapVector<const Instruction *, QueryResolver> InstQueries;
};
+} // namespace
/// ---------------------- Assumption Propagation ------------------------------
+namespace {
struct AAAssumptionInfoImpl : public AAAssumptionInfo {
AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A,
const DenseSet<StringRef> &Known)
@@ -9938,12 +10216,13 @@ struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
return !getAssumed().empty() || !getKnown().empty();
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
// Get the intersection of all assumptions held by this node's predecessors.
// If we don't know all the call sites then this is either an entry into the
// call graph or an empty node. This node is known to only contain its own
// assumptions and can be propagated to its successors.
- if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CallSitePred, *this, true,
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
@@ -10001,6 +10280,7 @@ private:
return Assumptions;
}
};
+} // namespace
AACallGraphNode *AACallEdgeIterator::operator*() const {
return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
@@ -10023,6 +10303,7 @@ const char AANoReturn::ID = 0;
const char AAIsDead::ID = 0;
const char AADereferenceable::ID = 0;
const char AAAlign::ID = 0;
+const char AAInstanceInfo::ID = 0;
const char AANoCapture::ID = 0;
const char AAValueSimplify::ID = 0;
const char AAHeapToStack::ID = 0;
@@ -10030,7 +10311,7 @@ const char AAPrivatizablePtr::ID = 0;
const char AAMemoryBehavior::ID = 0;
const char AAMemoryLocation::ID = 0;
const char AAValueConstantRange::ID = 0;
-const char AAPotentialValues::ID = 0;
+const char AAPotentialConstantValues::ID = 0;
const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
@@ -10145,9 +10426,10 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
index 7c178f9a9834..9e27ae49a901 100644
--- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
@@ -135,7 +135,8 @@ void BlockExtractor::loadFile() {
if (LineSplit.empty())
continue;
if (LineSplit.size()!=2)
- report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'");
+ report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
+ /*GenCrashDiag=*/false);
SmallVector<StringRef, 4> BBNames;
LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
@@ -194,13 +195,15 @@ bool BlockExtractor::runOnModule(Module &M) {
for (const auto &BInfo : BlocksByName) {
Function *F = M.getFunction(BInfo.first);
if (!F)
- report_fatal_error("Invalid function name specified in the input file");
+ report_fatal_error("Invalid function name specified in the input file",
+ /*GenCrashDiag=*/false);
for (const auto &BBInfo : BInfo.second) {
auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
return BB.getName().equals(BBInfo);
});
if (Res == F->end())
- report_fatal_error("Invalid block name specified in the input file");
+ report_fatal_error("Invalid block name specified in the input file",
+ /*GenCrashDiag=*/false);
GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
}
++NextGroupIdx;
@@ -212,7 +215,7 @@ bool BlockExtractor::runOnModule(Module &M) {
for (BasicBlock *BB : BBs) {
// Check if the module contains BB.
if (BB->getParent()->getParent() != &M)
- report_fatal_error("Invalid basic block");
+ report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
<< BB->getParent()->getName() << ":" << BB->getName()
<< "\n");
diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 927dceec8865..64bfcb2a9a9f 100644
--- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -19,11 +19,13 @@
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
#include "llvm/Analysis/SparsePropagation.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
+
using namespace llvm;
#define DEBUG_TYPE "called-value-propagation"
@@ -68,7 +70,7 @@ public:
}
};
- CVPLatticeVal() : LatticeState(Undefined) {}
+ CVPLatticeVal() = default;
CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {}
CVPLatticeVal(std::vector<Function *> &&Functions)
: LatticeState(FunctionSet), Functions(std::move(Functions)) {
@@ -94,7 +96,7 @@ public:
private:
/// Holds the state this lattice value is in.
- CVPLatticeStateTy LatticeState;
+ CVPLatticeStateTy LatticeState = Undefined;
/// Holds functions indicating the possible targets of call sites. This set
/// is empty for lattice values in the undefined, overdefined, and untracked
diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 178d3f41963e..73af30ece47c 100644
--- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -85,7 +85,7 @@ static void copyDebugLocMetadata(const GlobalVariable *From,
}
static Align getAlign(GlobalVariable *GV) {
- return GV->getAlign().getValueOr(
+ return GV->getAlign().value_or(
GV->getParent()->getDataLayout().getPreferredAlign(GV));
}
diff --git a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index 2fe9a59ad210..dfe33ac9da0d 100644
--- a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -15,21 +15,16 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 2a6e38b0437f..99fa4baf355d 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -16,18 +16,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -44,9 +43,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <cassert>
-#include <cstdint>
#include <utility>
#include <vector>
@@ -55,36 +54,36 @@ using namespace llvm;
#define DEBUG_TYPE "deadargelim"
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
-STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
-STATISTIC(NumArgumentsReplacedWithUndef,
- "Number of unread args replaced with undef");
+STATISTIC(NumRetValsEliminated, "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithPoison,
+ "Number of unread args replaced with poison");
namespace {
- /// DAE - The dead argument elimination pass.
- class DAE : public ModulePass {
- protected:
- // DAH uses this to specify a different ID.
- explicit DAE(char &ID) : ModulePass(ID) {}
+/// The dead argument elimination pass.
+class DAE : public ModulePass {
+protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(char &ID) : ModulePass(ID) {}
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(ID) {
- initializeDAEPass(*PassRegistry::getPassRegistry());
- }
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
- DeadArgumentEliminationPass DAEP(ShouldHackArguments());
- ModuleAnalysisManager DummyMAM;
- PreservedAnalyses PA = DAEP.run(M, DummyMAM);
- return !PA.areAllPreserved();
- }
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ DeadArgumentEliminationPass DAEP(shouldHackArguments());
+ ModuleAnalysisManager DummyMAM;
+ PreservedAnalyses PA = DAEP.run(M, DummyMAM);
+ return !PA.areAllPreserved();
+ }
- virtual bool ShouldHackArguments() const { return false; }
- };
+ virtual bool shouldHackArguments() const { return false; }
+};
} // end anonymous namespace
@@ -94,51 +93,51 @@ INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
namespace {
- /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
- /// deletes arguments to functions which are external. This is only for use
- /// by bugpoint.
- struct DAH : public DAE {
- static char ID;
+/// The DeadArgumentHacking pass, same as dead argument elimination, but deletes
+/// arguments to functions which are external. This is only for use by bugpoint.
+struct DAH : public DAE {
+ static char ID;
- DAH() : DAE(ID) {}
+ DAH() : DAE(ID) {}
- bool ShouldHackArguments() const override { return true; }
- };
+ bool shouldHackArguments() const override { return true; }
+};
} // end anonymous namespace
char DAH::ID = 0;
INITIALIZE_PASS(DAH, "deadarghaX0r",
- "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
- false, false)
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false,
+ false)
-/// createDeadArgEliminationPass - This pass removes arguments from functions
-/// which are not used by the body of the function.
+/// This pass removes arguments from functions which are not used by the body of
+/// the function.
ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
-/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
-/// llvm.vastart is never called, the varargs list is dead for the function.
-bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
- assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
- if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+/// If this is an function that takes a ... list, and if llvm.vastart is never
+/// called, the varargs list is dead for the function.
+bool DeadArgumentEliminationPass::deleteDeadVarargs(Function &F) {
+ assert(F.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (F.isDeclaration() || !F.hasLocalLinkage())
+ return false;
// Ensure that the function is only directly called.
- if (Fn.hasAddressTaken())
+ if (F.hasAddressTaken())
return false;
// Don't touch naked functions. The assembly might be using an argument, or
// otherwise rely on the frame layout in a way that this analysis will not
// see.
- if (Fn.hasFnAttribute(Attribute::Naked)) {
+ if (F.hasFnAttribute(Attribute::Naked)) {
return false;
}
// Okay, we know we can transform this function if safe. Scan its body
// looking for calls marked musttail or calls to llvm.vastart.
- for (BasicBlock &BB : Fn) {
+ for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
@@ -157,25 +156,24 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but doesn't have isVarArg set.
- FunctionType *FTy = Fn.getFunctionType();
+ FunctionType *FTy = F.getFunctionType();
std::vector<Type *> Params(FTy->param_begin(), FTy->param_end());
- FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
- Params, false);
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
unsigned NumArgs = Params.size();
// Create the new function body and insert it into the module...
- Function *NF = Function::Create(NFTy, Fn.getLinkage(), Fn.getAddressSpace());
- NF->copyAttributesFrom(&Fn);
- NF->setComdat(Fn.getComdat());
- Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
- NF->takeName(&Fn);
+ Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
+ NF->copyAttributesFrom(&F);
+ NF->setComdat(F.getComdat());
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->takeName(&F);
- // Loop over all of the callers of the function, transforming the call sites
+ // Loop over all the callers of the function, transforming the call sites
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value *> Args;
- for (User *U : llvm::make_early_inc_range(Fn.users())) {
+ for (User *U : llvm::make_early_inc_range(F.users())) {
CallBase *CB = dyn_cast<CallBase>(U);
if (!CB)
continue;
@@ -189,7 +187,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
SmallVector<AttributeSet, 8> ArgAttrs;
for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
- PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(),
+ PAL = AttributeList::get(F.getContext(), PAL.getFnAttrs(),
PAL.getRetAttrs(), ArgAttrs);
}
@@ -224,64 +222,67 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
// function empty.
- NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+ NF->getBasicBlockList().splice(NF->begin(), F.getBasicBlockList());
// Loop over the argument list, transferring uses of the old arguments over to
- // the new arguments, also transferring over the names as well. While we're at
- // it, remove the dead arguments from the DeadArguments list.
- for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
- I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // the new arguments, also transferring over the names as well. While we're
+ // at it, remove the dead arguments from the DeadArguments list.
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(),
+ I2 = NF->arg_begin();
+ I != E; ++I, ++I2) {
// Move the name and users over to the new version.
I->replaceAllUsesWith(&*I2);
I2->takeName(&*I);
}
- // Clone metadatas from the old function, including debug info descriptor.
+ // Clone metadata from the old function, including debug info descriptor.
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- Fn.getAllMetadata(MDs);
+ F.getAllMetadata(MDs);
for (auto MD : MDs)
NF->addMetadata(MD.first, *MD.second);
// Fix up any BlockAddresses that refer to the function.
- Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
+ F.replaceAllUsesWith(ConstantExpr::getBitCast(NF, F.getType()));
// Delete the bitcast that we just created, so that NF does not
// appear to be address-taken.
NF->removeDeadConstantUsers();
// Finally, nuke the old function.
- Fn.eraseFromParent();
+ F.eraseFromParent();
return true;
}
-/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
-/// arguments that are unused, and changes the caller parameters to be undefined
-/// instead.
-bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
+/// Checks if the given function has any arguments that are unused, and changes
+/// the caller parameters to be poison instead.
+bool DeadArgumentEliminationPass::removeDeadArgumentsFromCallers(Function &F) {
// We cannot change the arguments if this TU does not define the function or
// if the linker may choose a function body from another TU, even if the
// nominal linkage indicates that other copies of the function have the same
// semantics. In the below example, the dead load from %p may not have been
- // eliminated from the linker-chosen copy of f, so replacing %p with undef
+ // eliminated from the linker-chosen copy of f, so replacing %p with poison
// in callers may introduce undefined behavior.
//
// define linkonce_odr void @f(i32* %p) {
// %v = load i32 %p
// ret void
// }
- if (!Fn.hasExactDefinition())
+ if (!F.hasExactDefinition())
return false;
- // Functions with local linkage should already have been handled, except the
- // fragile (variadic) ones which we can improve here.
- if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
+ // Functions with local linkage should already have been handled, except if
+ // they are fully alive (e.g., called indirectly) and except for the fragile
+ // (variadic) ones. In these cases, we may still be able to improve their
+ // statically known call sites.
+ if ((F.hasLocalLinkage() && !LiveFunctions.count(&F)) &&
+ !F.getFunctionType()->isVarArg())
return false;
// Don't touch naked functions. The assembly might be using an argument, or
// otherwise rely on the frame layout in a way that this analysis will not
// see.
- if (Fn.hasFnAttribute(Attribute::Naked))
+ if (F.hasFnAttribute(Attribute::Naked))
return false;
- if (Fn.use_empty())
+ if (F.use_empty())
return false;
SmallVector<unsigned, 8> UnusedArgs;
@@ -289,35 +290,36 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
AttributeMask UBImplyingAttributes =
AttributeFuncs::getUBImplyingAttributes();
- for (Argument &Arg : Fn.args()) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() &&
!Arg.hasPassPointeeByValueCopyAttr()) {
if (Arg.isUsedByMetadata()) {
- Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
+ Arg.replaceAllUsesWith(PoisonValue::get(Arg.getType()));
Changed = true;
}
UnusedArgs.push_back(Arg.getArgNo());
- Fn.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes);
+ F.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes);
}
}
if (UnusedArgs.empty())
return false;
- for (Use &U : Fn.uses()) {
+ for (Use &U : F.uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
- if (!CB || !CB->isCallee(&U))
+ if (!CB || !CB->isCallee(&U) ||
+ CB->getFunctionType() != F.getFunctionType())
continue;
- // Now go through all unused args and replace them with "undef".
+ // Now go through all unused args and replace them with poison.
for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
unsigned ArgNo = UnusedArgs[I];
Value *Arg = CB->getArgOperand(ArgNo);
- CB->setArgOperand(ArgNo, UndefValue::get(Arg->getType()));
+ CB->setArgOperand(ArgNo, PoisonValue::get(Arg->getType()));
CB->removeParamAttrs(ArgNo, UBImplyingAttributes);
- ++NumArgumentsReplacedWithUndef;
+ ++NumArgumentsReplacedWithPoison;
Changed = true;
}
}
@@ -328,16 +330,15 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
/// Convenience function that returns the number of return values. It returns 0
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
-static unsigned NumRetVals(const Function *F) {
+static unsigned numRetVals(const Function *F) {
Type *RetTy = F->getReturnType();
if (RetTy->isVoidTy())
return 0;
- else if (StructType *STy = dyn_cast<StructType>(RetTy))
+ if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getNumElements();
- else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
return ATy->getNumElements();
- else
- return 1;
+ return 1;
}
/// Returns the sub-type a function will return at a given Idx. Should
@@ -349,20 +350,18 @@ static Type *getRetComponentType(const Function *F, unsigned Idx) {
if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getElementType(Idx);
- else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
return ATy->getElementType();
- else
- return RetTy;
+ return RetTy;
}
-/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
-/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
-/// liveness of Use.
+/// Checks Use for liveness in LiveValues. If Use is not live, it adds Use to
+/// the MaybeLiveUses argument. Returns the determined liveness of Use.
DeadArgumentEliminationPass::Liveness
-DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
+DeadArgumentEliminationPass::markIfNotLive(RetOrArg Use,
UseVector &MaybeLiveUses) {
// We're live if our use or its Function is already marked as live.
- if (IsLive(Use))
+ if (isLive(Use))
return Live;
// We're maybe live otherwise, but remember that we must become live if
@@ -371,127 +370,127 @@ DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
return MaybeLive;
}
-/// SurveyUse - This looks at a single use of an argument or return value
-/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
-/// if it causes the used value to become MaybeLive.
+/// Looks at a single use of an argument or return value and determines if it
+/// should be alive or not. Adds this use to MaybeLiveUses if it causes the
+/// used value to become MaybeLive.
///
/// RetValNum is the return value number to use when this use is used in a
/// return instruction. This is used in the recursion, you should always leave
/// it at 0.
DeadArgumentEliminationPass::Liveness
-DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses,
+DeadArgumentEliminationPass::surveyUse(const Use *U, UseVector &MaybeLiveUses,
unsigned RetValNum) {
- const User *V = U->getUser();
- if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
- // The value is returned from a function. It's only live when the
- // function's return value is live. We use RetValNum here, for the case
- // that U is really a use of an insertvalue instruction that uses the
- // original Use.
- const Function *F = RI->getParent()->getParent();
- if (RetValNum != -1U) {
- RetOrArg Use = CreateRet(F, RetValNum);
- // We might be live, depending on the liveness of Use.
- return MarkIfNotLive(Use, MaybeLiveUses);
- } else {
- DeadArgumentEliminationPass::Liveness Result = MaybeLive;
- for (unsigned Ri = 0; Ri < NumRetVals(F); ++Ri) {
- RetOrArg Use = CreateRet(F, Ri);
- // We might be live, depending on the liveness of Use. If any
- // sub-value is live, then the entire value is considered live. This
- // is a conservative choice, and better tracking is possible.
- DeadArgumentEliminationPass::Liveness SubResult =
- MarkIfNotLive(Use, MaybeLiveUses);
- if (Result != Live)
- Result = SubResult;
- }
- return Result;
- }
+ const User *V = U->getUser();
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // original Use.
+ const Function *F = RI->getParent()->getParent();
+ if (RetValNum != -1U) {
+ RetOrArg Use = createRet(F, RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return markIfNotLive(Use, MaybeLiveUses);
+ }
+
+ DeadArgumentEliminationPass::Liveness Result = MaybeLive;
+ for (unsigned Ri = 0; Ri < numRetVals(F); ++Ri) {
+ RetOrArg Use = createRet(F, Ri);
+ // We might be live, depending on the liveness of Use. If any
+ // sub-value is live, then the entire value is considered live. This
+ // is a conservative choice, and better tracking is possible.
+ DeadArgumentEliminationPass::Liveness SubResult =
+ markIfNotLive(Use, MaybeLiveUses);
+ if (Result != Live)
+ Result = SubResult;
}
- if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
- if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
- && IV->hasIndices())
- // The use we are examining is inserted into an aggregate. Our liveness
- // depends on all uses of that aggregate, but if it is used as a return
- // value, only index at which we were inserted counts.
- RetValNum = *IV->idx_begin();
+ return Result;
+ }
- // Note that if we are used as the aggregate operand to the insertvalue,
- // we don't change RetValNum, but do survey all our uses.
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() &&
+ IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
- Liveness Result = MaybeLive;
- for (const Use &UU : IV->uses()) {
- Result = SurveyUse(&UU, MaybeLiveUses, RetValNum);
- if (Result == Live)
- break;
- }
- return Result;
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (const Use &UU : IV->uses()) {
+ Result = surveyUse(&UU, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
}
+ return Result;
+ }
- if (const auto *CB = dyn_cast<CallBase>(V)) {
- const Function *F = CB->getCalledFunction();
- if (F) {
- // Used in a direct call.
+ if (const auto *CB = dyn_cast<CallBase>(V)) {
+ const Function *F = CB->getCalledFunction();
+ if (F) {
+ // Used in a direct call.
- // The function argument is live if it is used as a bundle operand.
- if (CB->isBundleOperand(U))
- return Live;
+ // The function argument is live if it is used as a bundle operand.
+ if (CB->isBundleOperand(U))
+ return Live;
- // Find the argument number. We know for sure that this use is an
- // argument, since if it was the function argument this would be an
- // indirect call and the we know can't be looking at a value of the
- // label type (for the invoke instruction).
- unsigned ArgNo = CB->getArgOperandNo(U);
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and that we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CB->getArgOperandNo(U);
- if (ArgNo >= F->getFunctionType()->getNumParams())
- // The value is passed in through a vararg! Must be live.
- return Live;
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
- assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) &&
- "Argument is not where we expected it");
+ assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) &&
+ "Argument is not where we expected it");
- // Value passed to a normal call. It's only live when the corresponding
- // argument to the called function turns out live.
- RetOrArg Use = CreateArg(F, ArgNo);
- return MarkIfNotLive(Use, MaybeLiveUses);
- }
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = createArg(F, ArgNo);
+ return markIfNotLive(Use, MaybeLiveUses);
}
- // Used in any other way? Value must be live.
- return Live;
+ }
+ // Used in any other way? Value must be live.
+ return Live;
}
-/// SurveyUses - This looks at all the uses of the given value
+/// Looks at all the uses of the given value
/// Returns the Liveness deduced from the uses of this value.
///
/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
/// the result is Live, MaybeLiveUses might be modified but its content should
/// be ignored (since it might not be complete).
DeadArgumentEliminationPass::Liveness
-DeadArgumentEliminationPass::SurveyUses(const Value *V,
+DeadArgumentEliminationPass::surveyUses(const Value *V,
UseVector &MaybeLiveUses) {
// Assume it's dead (which will only hold if there are no uses at all..).
Liveness Result = MaybeLive;
// Check each use.
for (const Use &U : V->uses()) {
- Result = SurveyUse(&U, MaybeLiveUses);
+ Result = surveyUse(&U, MaybeLiveUses);
if (Result == Live)
break;
}
return Result;
}
-// SurveyFunction - This performs the initial survey of the specified function,
-// checking out whether or not it uses any of its incoming arguments or whether
-// any callers use the return value. This fills in the LiveValues set and Uses
-// map.
-//
-// We consider arguments of non-internal functions to be intrinsically alive as
-// well as arguments to functions which have their "address taken".
-void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
+/// Performs the initial survey of the specified function, checking out whether
+/// it uses any of its incoming arguments or whether any callers use the return
+/// value. This fills in the LiveValues set and Uses map.
+///
+/// We consider arguments of non-internal functions to be intrinsically alive as
+/// well as arguments to functions which have their "address taken".
+void DeadArgumentEliminationPass::surveyFunction(const Function &F) {
// Functions with inalloca/preallocated parameters are expecting args in a
// particular register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
- MarkLive(F);
+ markLive(F);
return;
}
@@ -499,11 +498,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// otherwise rely on the frame layout in a way that this analysis will not
// see.
if (F.hasFnAttribute(Attribute::Naked)) {
- MarkLive(F);
+ markLive(F);
return;
}
- unsigned RetCount = NumRetVals(&F);
+ unsigned RetCount = numRetVals(&F);
// Assume all return values are dead
using RetVals = SmallVector<Liveness, 5>;
@@ -518,20 +517,10 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
RetUses MaybeLiveRetUses(RetCount);
bool HasMustTailCalls = false;
-
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
- != F.getFunctionType()->getReturnType()) {
- // We don't support old style multiple return values.
- MarkLive(F);
- return;
- }
- }
-
+ for (const BasicBlock &BB : F) {
// If we have any returns of `musttail` results - the signature can't
// change
- if (BB->getTerminatingMustTailCall() != nullptr)
+ if (BB.getTerminatingMustTailCall() != nullptr)
HasMustTailCalls = true;
}
@@ -541,7 +530,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
}
if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) {
- MarkLive(F);
+ markLive(F);
return;
}
@@ -559,8 +548,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// If the function is PASSED IN as an argument, its address has been
// taken.
const auto *CB = dyn_cast<CallBase>(U.getUser());
- if (!CB || !CB->isCallee(&U)) {
- MarkLive(F);
+ if (!CB || !CB->isCallee(&U) ||
+ CB->getFunctionType() != F.getFunctionType()) {
+ markLive(F);
return;
}
@@ -577,13 +567,13 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
continue;
// Check all uses of the return value.
- for (const Use &U : CB->uses()) {
- if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+ for (const Use &UU : CB->uses()) {
+ if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(UU.getUser())) {
// This use uses a part of our return value, survey the uses of
// that part and store the results for this index only.
unsigned Idx = *Ext->idx_begin();
if (RetValLiveness[Idx] != Live) {
- RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ RetValLiveness[Idx] = surveyUses(Ext, MaybeLiveRetUses[Idx]);
if (RetValLiveness[Idx] == Live)
NumLiveRetVals++;
}
@@ -591,16 +581,16 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Used by something else than extractvalue. Survey, but assume that the
// result applies to all sub-values.
UseVector MaybeLiveAggregateUses;
- if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
+ if (surveyUse(&UU, MaybeLiveAggregateUses) == Live) {
NumLiveRetVals = RetCount;
RetValLiveness.assign(RetCount, Live);
break;
- } else {
- for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
- if (RetValLiveness[Ri] != Live)
- MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(),
- MaybeLiveAggregateUses.end());
- }
+ }
+
+ for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
+ if (RetValLiveness[Ri] != Live)
+ MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
}
}
}
@@ -613,7 +603,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Now we've inspected all callers, record the liveness of our return values.
for (unsigned Ri = 0; Ri != RetCount; ++Ri)
- MarkValue(CreateRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]);
+ markValue(createRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]);
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: "
<< F.getName() << "\n");
@@ -641,81 +631,77 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
} else {
// See what the effect of this use is (recording any uses that cause
// MaybeLive in MaybeLiveArgUses).
- Result = SurveyUses(&*AI, MaybeLiveArgUses);
+ Result = surveyUses(&*AI, MaybeLiveArgUses);
}
// Mark the result.
- MarkValue(CreateArg(&F, ArgI), Result, MaybeLiveArgUses);
+ markValue(createArg(&F, ArgI), Result, MaybeLiveArgUses);
// Clear the vector again for the next iteration.
MaybeLiveArgUses.clear();
}
}
-/// MarkValue - This function marks the liveness of RA depending on L. If L is
-/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
-/// such that RA will be marked live if any use in MaybeLiveUses gets marked
-/// live later on.
-void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L,
+/// Marks the liveness of RA depending on L. If L is MaybeLive, it also takes
+/// all uses in MaybeLiveUses and records them in Uses, such that RA will be
+/// marked live if any use in MaybeLiveUses gets marked live later on.
+void DeadArgumentEliminationPass::markValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses) {
switch (L) {
- case Live:
- MarkLive(RA);
- break;
- case MaybeLive:
- assert(!IsLive(RA) && "Use is already live!");
- for (const auto &MaybeLiveUse : MaybeLiveUses) {
- if (IsLive(MaybeLiveUse)) {
- // A use is live, so this value is live.
- MarkLive(RA);
- break;
- } else {
- // Note any uses of this value, so this value can be
- // marked live whenever one of the uses becomes live.
- Uses.insert(std::make_pair(MaybeLiveUse, RA));
- }
+ case Live:
+ markLive(RA);
+ break;
+ case MaybeLive:
+ assert(!isLive(RA) && "Use is already live!");
+ for (const auto &MaybeLiveUse : MaybeLiveUses) {
+ if (isLive(MaybeLiveUse)) {
+ // A use is live, so this value is live.
+ markLive(RA);
+ break;
}
- break;
+ // Note any uses of this value, so this value can be
+ // marked live whenever one of the uses becomes live.
+ Uses.emplace(MaybeLiveUse, RA);
+ }
+ break;
}
}
-/// MarkLive - Mark the given Function as alive, meaning that it cannot be
-/// changed in any way. Additionally,
-/// mark any values that are used as this function's parameters or by its return
-/// values (according to Uses) live as well.
-void DeadArgumentEliminationPass::MarkLive(const Function &F) {
+/// Mark the given Function as alive, meaning that it cannot be changed in any
+/// way. Additionally, mark any values that are used as this function's
+/// parameters or by its return values (according to Uses) live as well.
+void DeadArgumentEliminationPass::markLive(const Function &F) {
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Intrinsically live fn: "
<< F.getName() << "\n");
// Mark the function as live.
LiveFunctions.insert(&F);
// Mark all arguments as live.
for (unsigned ArgI = 0, E = F.arg_size(); ArgI != E; ++ArgI)
- PropagateLiveness(CreateArg(&F, ArgI));
+ propagateLiveness(createArg(&F, ArgI));
// Mark all return values as live.
- for (unsigned Ri = 0, E = NumRetVals(&F); Ri != E; ++Ri)
- PropagateLiveness(CreateRet(&F, Ri));
+ for (unsigned Ri = 0, E = numRetVals(&F); Ri != E; ++Ri)
+ propagateLiveness(createRet(&F, Ri));
}
-/// MarkLive - Mark the given return value or argument as live. Additionally,
-/// mark any values that are used by this value (according to Uses) live as
-/// well.
-void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) {
- if (IsLive(RA))
+/// Mark the given return value or argument as live. Additionally, mark any
+/// values that are used by this value (according to Uses) live as well.
+void DeadArgumentEliminationPass::markLive(const RetOrArg &RA) {
+ if (isLive(RA))
return; // Already marked Live.
LiveValues.insert(RA);
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking "
<< RA.getDescription() << " live\n");
- PropagateLiveness(RA);
+ propagateLiveness(RA);
}
-bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) {
+bool DeadArgumentEliminationPass::isLive(const RetOrArg &RA) {
return LiveFunctions.count(RA.F) || LiveValues.count(RA);
}
-/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
-/// to any other values it uses (according to Uses).
-void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) {
+/// Given that RA is a live value, propagate it's liveness to any other values
+/// it uses (according to Uses).
+void DeadArgumentEliminationPass::propagateLiveness(const RetOrArg &RA) {
// We don't use upper_bound (or equal_range) here, because our recursive call
// to ourselves is likely to cause the upper_bound (which is the first value
// not belonging to RA) to become erased and the iterator invalidated.
@@ -723,18 +709,17 @@ void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) {
UseMap::iterator E = Uses.end();
UseMap::iterator I;
for (I = Begin; I != E && I->first == RA; ++I)
- MarkLive(I->second);
+ markLive(I->second);
// Erase RA from the Uses map (from the lower bound to wherever we ended up
// after the loop).
Uses.erase(Begin, I);
}
-// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
-// that are not in LiveValues. Transform the function and all of the callees of
-// the function to not have these arguments and return values.
-//
-bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
+/// Remove any arguments and return values from F that are not in LiveValues.
+/// Transform the function and all the callees of the function to not have these
+/// arguments and return values.
+bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
// Don't modify fully live functions
if (LiveFunctions.count(F))
return false;
@@ -742,7 +727,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has fewer arguments and a different return type.
FunctionType *FTy = F->getFunctionType();
- std::vector<Type*> Params;
+ std::vector<Type *> Params;
// Keep track of if we have a live 'returned' argument
bool HasLiveReturnedArg = false;
@@ -759,7 +744,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
unsigned ArgI = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgI) {
- RetOrArg Arg = CreateArg(F, ArgI);
+ RetOrArg Arg = createArg(F, ArgI);
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[ArgI] = true;
@@ -776,11 +761,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Find out the new return value.
Type *RetTy = FTy->getReturnType();
Type *NRetTy = nullptr;
- unsigned RetCount = NumRetVals(F);
+ unsigned RetCount = numRetVals(F);
// -1 means unused, other numbers are the new index
SmallVector<int, 5> NewRetIdxs(RetCount, -1);
- std::vector<Type*> RetTypes;
+ std::vector<Type *> RetTypes;
// If there is a function with a live 'returned' argument but a dead return
// value, then there are two possible actions:
@@ -792,9 +777,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// It's not clear in the general case which option is more profitable because,
// even in the absence of explicit uses of the return value, code generation
// is free to use the 'returned' attribute to do things like eliding
- // save/restores of registers across calls. Whether or not this happens is
- // target and ABI-specific as well as depending on the amount of register
- // pressure, so there's no good way for an IR-level pass to figure this out.
+ // save/restores of registers across calls. Whether this happens is target and
+ // ABI-specific as well as depending on the amount of register pressure, so
+ // there's no good way for an IR-level pass to figure this out.
//
// Fortunately, the only places where 'returned' is currently generated by
// the FE are places where 'returned' is basically free and almost always a
@@ -806,7 +791,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
} else {
// Look at each of the original return values individually.
for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
- RetOrArg Ret = CreateRet(F, Ri);
+ RetOrArg Ret = createRet(F, Ri);
if (LiveValues.erase(Ret)) {
RetTypes.push_back(getRetComponentType(F, Ri));
NewRetIdxs[Ri] = RetTypes.size() - 1;
@@ -879,9 +864,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in a smaller number of arguments into the new function.
- std::vector<Value*> Args;
+ // Loop over all the callers of the function, transforming the call sites to
+ // pass in a smaller number of arguments into the new function.
+ std::vector<Value *> Args;
while (!F->use_empty()) {
CallBase &CB = cast<CallBase>(*F->user_back());
@@ -896,7 +881,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
- auto I = CB.arg_begin();
+ auto *I = CB.arg_begin();
unsigned Pi = 0;
// Loop over those operands, corresponding to the normal arguments to the
// original function, and add those that are still alive.
@@ -909,11 +894,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
// call sites keep the return value alive just like 'returned'
- // attributes on function declaration but it's less clearly a win and
+ // attributes on function declaration, but it's less clearly a win and
// this is not an expected case anyway
ArgAttrVec.push_back(AttributeSet::get(
- F->getContext(),
- AttrBuilder(F->getContext(), Attrs).removeAttribute(Attribute::Returned)));
+ F->getContext(), AttrBuilder(F->getContext(), Attrs)
+ .removeAttribute(Attribute::Returned)));
} else {
// Otherwise, use the original attributes.
ArgAttrVec.push_back(Attrs);
@@ -921,7 +906,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
}
// Push any varargs arguments on the list. Don't forget their attributes.
- for (auto E = CB.arg_end(); I != E; ++I, ++Pi) {
+ for (auto *E = CB.arg_end(); I != E; ++I, ++Pi) {
Args.push_back(*I);
ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi));
}
@@ -934,8 +919,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute(
F->getContext(), Attribute::AllocSize);
- AttributeList NewCallPAL = AttributeList::get(
- F->getContext(), FnAttrs, RetAttrs, ArgAttrVec);
+ AttributeList NewCallPAL =
+ AttributeList::get(F->getContext(), FnAttrs, RetAttrs, ArgAttrVec);
SmallVector<OperandBundleDef, 1> OpBundles;
CB.getOperandBundlesAsDefs(OpBundles);
@@ -961,10 +946,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
CB.replaceAllUsesWith(NewCB);
NewCB->takeName(&CB);
} else if (NewCB->getType()->isVoidTy()) {
- // If the return value is dead, replace any uses of it with undef
+ // If the return value is dead, replace any uses of it with poison
// (any non-debug value uses will get removed later on).
if (!CB.getType()->isX86_MMXTy())
- CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
} else {
assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
@@ -980,8 +965,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// with all the uses, we will just rebuild it using extract/insertvalue
// chaining and let instcombine clean that up.
//
- // Start out building up our return value from undef
- Value *RetVal = UndefValue::get(RetTy);
+ // Start out building up our return value from poison
+ Value *RetVal = PoisonValue::get(RetTy);
for (unsigned Ri = 0; Ri != RetCount; ++Ri)
if (NewRetIdxs[Ri] != -1) {
Value *V;
@@ -1026,10 +1011,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
I2->takeName(&*I);
++I2;
} else {
- // If this argument is dead, replace any uses of it with undef
+ // If this argument is dead, replace any uses of it with poison
// (any non-debug value uses will get removed later on).
if (!I->getType()->isX86_MMXTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
}
// If we change the return value of the function we must rewrite any return
@@ -1048,8 +1033,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// This does generate messy code, but we'll let it to instcombine to
// clean that up.
Value *OldRet = RI->getOperand(0);
- // Start out building up our return value from undef
- RetVal = UndefValue::get(NRetTy);
+ // Start out building up our return value from poison
+ RetVal = PoisonValue::get(NRetTy);
for (unsigned RetI = 0; RetI != RetCount; ++RetI)
if (NewRetIdxs[RetI] != -1) {
Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret");
@@ -1074,12 +1059,22 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
BB.getInstList().erase(RI);
}
- // Clone metadatas from the old function, including debug info descriptor.
+ // Clone metadata from the old function, including debug info descriptor.
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
F->getAllMetadata(MDs);
for (auto MD : MDs)
NF->addMetadata(MD.first, *MD.second);
+ // If either the return value(s) or argument(s) are removed, then probably the
+ // function does not follow standard calling conventions anymore. Hence, add
+ // DW_CC_nocall to DISubroutineType to inform debugger that it may not be safe
+ // to call this function or try to interpret the return value.
+ if (NFTy != FTy && NF->getSubprogram()) {
+ DISubprogram *SP = NF->getSubprogram();
+ auto Temp = SP->getType()->cloneWithCC(llvm::dwarf::DW_CC_nocall);
+ SP->replaceType(MDNode::replaceWithPermanent(std::move(Temp)));
+ }
+
// Now that the old function is dead, delete it.
F->eraseFromParent();
@@ -1097,26 +1092,25 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n");
for (Function &F : llvm::make_early_inc_range(M))
if (F.getFunctionType()->isVarArg())
- Changed |= DeleteDeadVarargs(F);
+ Changed |= deleteDeadVarargs(F);
- // Second phase:loop through the module, determining which arguments are live.
- // We assume all arguments are dead unless proven otherwise (allowing us to
- // determine that dead arguments passed into recursive functions are dead).
- //
+ // Second phase: Loop through the module, determining which arguments are
+ // live. We assume all arguments are dead unless proven otherwise (allowing us
+ // to determine that dead arguments passed into recursive functions are dead).
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Determining liveness\n");
for (auto &F : M)
- SurveyFunction(F);
+ surveyFunction(F);
// Now, remove all dead arguments and return values from each function in
// turn. We use make_early_inc_range here because functions will probably get
// removed (i.e. replaced by new ones).
for (Function &F : llvm::make_early_inc_range(M))
- Changed |= RemoveDeadStuffFromFunction(&F);
+ Changed |= removeDeadStuffFromFunction(&F);
// Finally, look for any unused parameters in functions with non-local
- // linkage and replace the passed in parameters with undef.
+ // linkage and replace the passed in parameters with poison.
for (auto &F : M)
- Changed |= RemoveDeadArgumentsFromCallers(F);
+ Changed |= removeDeadArgumentsFromCallers(F);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 387f114f6ffa..84280781ee70 100644
--- a/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 16d00a0c89e1..b10c2ea13469 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -8,9 +8,9 @@
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 213a998d5bba..49077f92884f 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -30,7 +30,6 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -69,6 +69,7 @@ using namespace llvm;
#define DEBUG_TYPE "function-attrs"
+STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly");
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumWriteOnly, "Number of functions marked writeonly");
@@ -121,28 +122,28 @@ using SCCNodeSet = SmallSetVector<Function *, 8>;
/// result will be based only on AA results for the function declaration; it
/// will be assumed that some other (perhaps less optimized) version of the
/// function may be selected at link time.
-static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
- AAResults &AAR,
- const SCCNodeSet &SCCNodes) {
+static FunctionModRefBehavior
+checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
if (MRB == FMRB_DoesNotAccessMemory)
// Already perfect!
- return MAK_ReadNone;
+ return MRB;
- if (!ThisBody) {
- if (AliasAnalysis::onlyReadsMemory(MRB))
- return MAK_ReadOnly;
-
- if (AliasAnalysis::onlyWritesMemory(MRB))
- return MAK_WriteOnly;
-
- // Conservatively assume it reads and writes to memory.
- return MAK_MayWrite;
- }
+ if (!ThisBody)
+ return MRB;
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
+ // Track if the function accesses memory not based on pointer arguments or
+ // allocas.
+ bool AccessesNonArgsOrAlloca = false;
+ // Returns true if Ptr is not based on a function argument.
+ auto IsArgumentOrAlloca = [](const Value *Ptr) {
+ const Value *UO = getUnderlyingObject(Ptr);
+ return isa<Argument>(UO) || isa<AllocaInst>(UO);
+ };
for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
@@ -175,6 +176,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// If it reads, note it.
if (isRefSet(MRI))
ReadsMemory = true;
+ AccessesNonArgsOrAlloca = true;
continue;
}
@@ -187,12 +189,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
MemoryLocation Loc =
MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
-
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
+
if (isModSet(MRI))
// Writes non-local memory.
WritesMemory = true;
@@ -202,24 +205,29 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
}
continue;
} else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ MemoryLocation Loc = MemoryLocation::get(LI);
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
- if (!LI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(LI);
- if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
+ if (!LI->isVolatile() &&
+ AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ MemoryLocation Loc = MemoryLocation::get(SI);
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
- if (!SI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(SI);
- if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
+ if (!SI->isVolatile() &&
+ AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
+ } else {
+ // If AccessesNonArgsOrAlloca has not been updated above, set it
+ // conservatively.
+ AccessesNonArgsOrAlloca |= I.mayReadOrWriteMemory();
}
// Any remaining instructions need to be taken seriously! Check if they
@@ -232,61 +240,74 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
ReadsMemory |= I.mayReadFromMemory();
}
- if (WritesMemory) {
- if (!ReadsMemory)
- return MAK_WriteOnly;
- else
- return MAK_MayWrite;
- }
+ if (!WritesMemory && !ReadsMemory)
+ return FMRB_DoesNotAccessMemory;
- return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+ FunctionModRefBehavior Result = FunctionModRefBehavior(FMRL_Anywhere);
+ if (!AccessesNonArgsOrAlloca)
+ Result = FunctionModRefBehavior(FMRL_ArgumentPointees);
+ if (WritesMemory)
+ Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Mod));
+ if (ReadsMemory)
+ Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Ref));
+ return Result;
}
-MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
- AAResults &AAR) {
+FunctionModRefBehavior llvm::computeFunctionBodyMemoryAccess(Function &F,
+ AAResults &AAR) {
return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
}
-/// Deduce readonly/readnone attributes for the SCC.
+/// Deduce readonly/readnone/writeonly attributes for the SCC.
template <typename AARGetterT>
-static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
- SmallSet<Function *, 8> &Changed) {
+static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+ SmallSet<Function *, 8> &Changed) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
bool WritesMemory = false;
+ // Check if all functions only access memory through their arguments.
+ bool ArgMemOnly = true;
for (Function *F : SCCNodes) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
-
// Non-exact function definitions may not be selected at link time, and an
// alternative version that writes to memory may be selected. See the
// comment on GlobalValue::isDefinitionExact for more details.
- switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
- AAR, SCCNodes)) {
- case MAK_MayWrite:
+ FunctionModRefBehavior FMRB =
+ checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes);
+ if (FMRB == FMRB_DoesNotAccessMemory)
+ continue;
+ ModRefInfo MR = createModRefInfo(FMRB);
+ ReadsMemory |= isRefSet(MR);
+ WritesMemory |= isModSet(MR);
+ ArgMemOnly &= AliasAnalysis::onlyAccessesArgPointees(FMRB);
+ // Reached neither readnone, readonly, writeonly nor argmemonly can be
+ // inferred. Exit.
+ if (ReadsMemory && WritesMemory && !ArgMemOnly)
return;
- case MAK_ReadOnly:
- ReadsMemory = true;
- break;
- case MAK_WriteOnly:
- WritesMemory = true;
- break;
- case MAK_ReadNone:
- // Nothing to do!
- break;
- }
}
- // If the SCC contains both functions that read and functions that write, then
- // we cannot add readonly attributes.
- if (ReadsMemory && WritesMemory)
- return;
-
- // Success! Functions in this SCC do not access memory, or only read memory.
- // Give them the appropriate attribute.
+ assert((!ReadsMemory || !WritesMemory || ArgMemOnly) &&
+ "no memory attributes can be added for this SCC, should have exited "
+ "earlier");
+ // Success! Functions in this SCC do not access memory, only read memory,
+ // only write memory, or only access memory through its arguments. Give them
+ // the appropriate attribute.
for (Function *F : SCCNodes) {
+ // If possible add argmemonly attribute to F, if it accesses memory.
+ if (ArgMemOnly && !F->onlyAccessesArgMemory() &&
+ (ReadsMemory || WritesMemory)) {
+ NumArgMemOnly++;
+ F->addFnAttr(Attribute::ArgMemOnly);
+ Changed.insert(F);
+ }
+
+ // The SCC contains functions both writing and reading from memory. We
+ // cannot add readonly or writeonline attributes.
+ if (ReadsMemory && WritesMemory)
+ continue;
if (F->doesNotAccessMemory())
// Already perfect!
continue;
@@ -1614,6 +1635,26 @@ static bool basicBlockCanReturn(BasicBlock &BB) {
return none_of(BB, instructionDoesNotReturn);
}
+// FIXME: this doesn't handle recursion.
+static bool canReturn(Function &F) {
+ SmallVector<BasicBlock *, 16> Worklist;
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ Visited.insert(&F.front());
+ Worklist.push_back(&F.front());
+
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (basicBlockCanReturn(*BB))
+ return true;
+ for (BasicBlock *Succ : successors(BB))
+ if (Visited.insert(Succ).second)
+ Worklist.push_back(Succ);
+ } while (!Worklist.empty());
+
+ return false;
+}
+
// Set the noreturn function attribute if possible.
static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
@@ -1622,9 +1663,7 @@ static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
F->doesNotReturn())
continue;
- // The function can return if any basic blocks can return.
- // FIXME: this doesn't handle recursion or unreachable blocks.
- if (none_of(*F, basicBlockCanReturn)) {
+ if (!canReturn(*F)) {
F->setDoesNotReturn();
Changed.insert(F);
}
@@ -1792,7 +1831,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
SmallSet<Function *, 8> Changed;
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
- addReadAttrs(Nodes.SCCNodes, AARGetter, Changed);
+ addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
@@ -1896,6 +1935,7 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
char PostOrderFunctionAttrsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
@@ -1975,12 +2015,13 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
// this function could be recursively (indirectly) called. Note that this
// also detects if F is directly recursive as F is not yet marked as
// a norecurse function.
- for (auto *U : F.users()) {
- auto *I = dyn_cast<Instruction>(U);
+ for (auto &U : F.uses()) {
+ auto *I = dyn_cast<Instruction>(U.getUser());
if (!I)
return false;
CallBase *CB = dyn_cast<CallBase>(I);
- if (!CB || !CB->getParent()->getParent()->doesNotRecurse())
+ if (!CB || !CB->isCallee(&U) ||
+ !CB->getParent()->getParent()->doesNotRecurse())
return false;
}
F.setDoesNotRecurse();
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index d9b43109f629..56e2df14ff38 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Constants.h"
@@ -33,8 +32,6 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
#include "llvm/Linker/IRMover.h"
-#include "llvm/Object/ModuleSymbolTable.h"
-#include "llvm/Object/SymbolicFile.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1112,12 +1109,13 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule,
llvm_unreachable("Expected GV to be converted");
} else {
// If all copies of the original symbol had global unnamed addr and
- // linkonce_odr linkage, it should be an auto hide symbol. In that case
- // the thin link would have marked it as CanAutoHide. Add hidden visibility
- // to the symbol to preserve the property.
+ // linkonce_odr linkage, or if all of them had local unnamed addr linkage
+ // and are constants, then it should be an auto hide symbol. In that case
+ // the thin link would have marked it as CanAutoHide. Add hidden
+ // visibility to the symbol to preserve the property.
if (NewLinkage == GlobalValue::WeakODRLinkage &&
GS->second->canAutoHide()) {
- assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr());
+ assert(GV.canBeOmittedFromSymbolTable());
GV.setVisibility(GlobalValue::HiddenVisibility);
}
@@ -1330,10 +1328,9 @@ Expected<bool> FunctionImporter::importFunctions(
<< " from " << SrcModule->getSourceFileName() << "\n";
}
- if (Error Err = Mover.move(
- std::move(SrcModule), GlobalsToImport.getArrayRef(),
- [](GlobalValue &, IRMover::ValueAdder) {},
- /*IsPerformingImport=*/true))
+ if (Error Err = Mover.move(std::move(SrcModule),
+ GlobalsToImport.getArrayRef(), nullptr,
+ /*IsPerformingImport=*/true))
report_fatal_error(Twine("Function Import: link error: ") +
toString(std::move(Err)));
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 6c3cc3914337..dafd0dc865a2 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -19,11 +19,8 @@
// Current limitations:
// - It does not yet handle integer ranges. We do support "literal constants",
// but that's off by default under an option.
-// - Only 1 argument per function is specialised,
// - The cost-model could be further looked into (it mainly focuses on inlining
// benefits),
-// - We are not yet caching analysis results, but profiling and checking where
-// extra compile time is spent didn't suggest this to be a problem.
//
// Ideas:
// - With a function specialization attribute for arguments, we could have
@@ -49,15 +46,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueLattice.h"
+#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/SCCPSolver.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <cmath>
@@ -98,8 +96,13 @@ static cl::opt<bool> SpecializeOnAddresses(
"func-specialization-on-address", cl::init(false), cl::Hidden,
cl::desc("Enable function specialization on the address of global values"));
-// TODO: This needs checking to see the impact on compile-times, which is why
-// this is off by default for now.
+// Disabled by default as it can significantly increase compilation times.
+// Running nikic's compile time tracker on x86 with instruction count as the
+// metric shows 3-4% regression for SPASS while being neutral for all other
+// benchmarks of the llvm test suite.
+//
+// https://llvm-compile-time-tracker.com
+// https://github.com/nikic/llvm-compile-time-tracker
static cl::opt<bool> EnableSpecializationForLiteralConstant(
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
cl::desc("Enable specialization of functions that take a literal constant "
@@ -108,24 +111,18 @@ static cl::opt<bool> EnableSpecializationForLiteralConstant(
namespace {
// Bookkeeping struct to pass data from the analysis and profitability phase
// to the actual transform helper functions.
-struct ArgInfo {
- Function *Fn; // The function to perform specialisation on.
- Argument *Arg; // The Formal argument being analysed.
- Constant *Const; // A corresponding actual constant argument.
- InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
-
- // Flag if this will be a partial specialization, in which case we will need
- // to keep the original function around in addition to the added
- // specializations.
- bool Partial = false;
-
- ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G)
- : Fn(F), Arg(A), Const(C), Gain(G){};
+struct SpecializationInfo {
+ SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
+ InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
};
} // Anonymous namespace
using FuncList = SmallVectorImpl<Function *>;
-using ConstList = SmallVectorImpl<Constant *>;
+using CallArgBinding = std::pair<CallBase *, Constant *>;
+using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
+// We are using MapVector because it guarantees deterministic iteration
+// order across executions.
+using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
// Helper to check if \p LV is either a constant or a constant
// range with a single element. This should cover exactly the same cases as the
@@ -204,41 +201,45 @@ static Constant *getConstantStackValue(CallInst *Call, Value *Val,
// ret void
// }
//
-static void constantArgPropagation(FuncList &WorkList,
- Module &M, SCCPSolver &Solver) {
+static void constantArgPropagation(FuncList &WorkList, Module &M,
+ SCCPSolver &Solver) {
// Iterate over the argument tracked functions see if there
// are any new constant values for the call instruction via
// stack variables.
for (auto *F : WorkList) {
- // TODO: Generalize for any read only arguments.
- if (F->arg_size() != 1)
- continue;
-
- auto &Arg = *F->arg_begin();
- if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy())
- continue;
for (auto *User : F->users()) {
+
auto *Call = dyn_cast<CallInst>(User);
if (!Call)
- break;
- auto *ArgOp = Call->getArgOperand(0);
- auto *ArgOpType = ArgOp->getType();
- auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
- if (!ConstVal)
- break;
+ continue;
- Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
- GlobalValue::InternalLinkage, ConstVal,
- "funcspec.arg");
+ bool Changed = false;
+ for (const Use &U : Call->args()) {
+ unsigned Idx = Call->getArgOperandNo(&U);
+ Value *ArgOp = Call->getArgOperand(Idx);
+ Type *ArgOpType = ArgOp->getType();
- if (ArgOpType != ConstVal->getType())
- GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType());
+ if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
+ continue;
+
+ auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
+ if (!ConstVal)
+ continue;
+
+ Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
+ GlobalValue::InternalLinkage, ConstVal,
+ "funcspec.arg");
+ if (ArgOpType != ConstVal->getType())
+ GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
- Call->setArgOperand(0, GV);
+ Call->setArgOperand(Idx, GV);
+ Changed = true;
+ }
// Add the changed CallInst to Solver Worklist
- Solver.visitCall(*Call);
+ if (Changed)
+ Solver.visitCall(*Call);
}
}
}
@@ -275,7 +276,10 @@ class FunctionSpecializer {
std::function<TargetTransformInfo &(Function &)> GetTTI;
std::function<TargetLibraryInfo &(Function &)> GetTLI;
- SmallPtrSet<Function *, 2> SpecializedFuncs;
+ SmallPtrSet<Function *, 4> SpecializedFuncs;
+ SmallPtrSet<Function *, 4> FullySpecialized;
+ SmallVector<Instruction *> ReplacedWithConstant;
+ DenseMap<Function *, CodeMetrics> FunctionMetrics;
public:
FunctionSpecializer(SCCPSolver &Solver,
@@ -284,42 +288,66 @@ public:
std::function<TargetLibraryInfo &(Function &)> GetTLI)
: Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {}
+ ~FunctionSpecializer() {
+ // Eliminate dead code.
+ removeDeadInstructions();
+ removeDeadFunctions();
+ }
+
/// Attempt to specialize functions in the module to enable constant
/// propagation across function boundaries.
///
/// \returns true if at least one function is specialized.
- bool
- specializeFunctions(FuncList &FuncDecls,
- FuncList &CurrentSpecializations) {
+ bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) {
bool Changed = false;
- for (auto *F : FuncDecls) {
- if (!isCandidateFunction(F, CurrentSpecializations))
+ for (auto *F : Candidates) {
+ if (!isCandidateFunction(F))
continue;
auto Cost = getSpecializationCost(F);
if (!Cost.isValid()) {
LLVM_DEBUG(
- dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
+ dbgs() << "FnSpecialization: Invalid specialization cost.\n");
continue;
}
- auto ConstArgs = calculateGains(F, Cost);
- if (ConstArgs.empty()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
+ << F->getName() << " is " << Cost << "\n");
+
+ SmallVector<CallSpecBinding, 8> Specializations;
+ if (!calculateGains(F, Cost, Specializations)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n");
continue;
}
- for (auto &CA : ConstArgs) {
- specializeFunction(CA, CurrentSpecializations);
- Changed = true;
- }
+ Changed = true;
+ for (auto &Entry : Specializations)
+ specializeFunction(F, Entry.second, WorkList);
}
- updateSpecializedFuncs(FuncDecls, CurrentSpecializations);
+ updateSpecializedFuncs(Candidates, WorkList);
NumFuncSpecialized += NbFunctionsSpecialized;
return Changed;
}
+ void removeDeadInstructions() {
+ for (auto *I : ReplacedWithConstant) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I
+ << "\n");
+ I->eraseFromParent();
+ }
+ ReplacedWithConstant.clear();
+ }
+
+ void removeDeadFunctions() {
+ for (auto *F : FullySpecialized) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "
+ << F->getName() << "\n");
+ F->eraseFromParent();
+ }
+ FullySpecialized.clear();
+ }
+
bool tryToReplaceWithConstant(Value *V) {
if (!V->getType()->isSingleValueType() || isa<CallBase>(V) ||
V->user_empty())
@@ -330,17 +358,26 @@ public:
return false;
auto *Const =
isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
- V->replaceAllUsesWith(Const);
- for (auto *U : Const->users())
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V
+ << "\nFnSpecialization: with " << *Const << "\n");
+
+ // Record uses of V to avoid visiting irrelevant uses of const later.
+ SmallVector<Instruction *> UseInsts;
+ for (auto *U : V->users())
if (auto *I = dyn_cast<Instruction>(U))
if (Solver.isBlockExecutable(I->getParent()))
- Solver.visit(I);
+ UseInsts.push_back(I);
+
+ V->replaceAllUsesWith(Const);
+
+ for (auto *I : UseInsts)
+ Solver.visit(I);
// Remove the instruction from Block and Solver.
if (auto *I = dyn_cast<Instruction>(V)) {
if (I->isSafeToRemove()) {
- I->eraseFromParent();
+ ReplacedWithConstant.push_back(I);
Solver.removeLatticeValueFor(I);
}
}
@@ -352,92 +389,108 @@ private:
// also in the cost model.
unsigned NbFunctionsSpecialized = 0;
+ // Compute the code metrics for function \p F.
+ CodeMetrics &analyzeFunction(Function *F) {
+ auto I = FunctionMetrics.insert({F, CodeMetrics()});
+ CodeMetrics &Metrics = I.first->second;
+ if (I.second) {
+ // The code metrics were not cached.
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
+ for (BasicBlock &BB : *F)
+ Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
+ << F->getName() << " is " << Metrics.NumInsts
+ << " instructions\n");
+ }
+ return Metrics;
+ }
+
/// Clone the function \p F and remove the ssa_copy intrinsics added by
/// the SCCPSolver in the cloned version.
- Function *cloneCandidateFunction(Function *F) {
- ValueToValueMapTy EmptyMap;
- Function *Clone = CloneFunction(F, EmptyMap);
+ Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) {
+ Function *Clone = CloneFunction(F, Mappings);
removeSSACopy(*Clone);
return Clone;
}
- /// This function decides whether it's worthwhile to specialize function \p F
- /// based on the known constant values its arguments can take on, i.e. it
- /// calculates a gain and returns a list of actual arguments that are deemed
- /// profitable to specialize. Specialization is performed on the first
- /// interesting argument. Specializations based on additional arguments will
- /// be evaluated on following iterations of the main IPSCCP solve loop.
- SmallVector<ArgInfo> calculateGains(Function *F, InstructionCost Cost) {
- SmallVector<ArgInfo> Worklist;
+ /// This function decides whether it's worthwhile to specialize function
+ /// \p F based on the known constant values its arguments can take on. It
+ /// only discovers potential specialization opportunities without actually
+ /// applying them.
+ ///
+ /// \returns true if any specializations have been found.
+ bool calculateGains(Function *F, InstructionCost Cost,
+ SmallVectorImpl<CallSpecBinding> &WorkList) {
+ SpecializationMap Specializations;
// Determine if we should specialize the function based on the values the
// argument can take on. If specialization is not profitable, we continue
// on to the next argument.
for (Argument &FormalArg : F->args()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: "
- << FormalArg.getName() << "\n");
// Determine if this argument is interesting. If we know the argument can
- // take on any constant values, they are collected in Constants. If the
- // argument can only ever equal a constant value in Constants, the
- // function will be completely specialized, and the IsPartial flag will
- // be set to false by isArgumentInteresting (that function only adds
- // values to the Constants list that are deemed profitable).
- bool IsPartial = true;
- SmallVector<Constant *> ActualConstArg;
- if (!isArgumentInteresting(&FormalArg, ActualConstArg, IsPartial)) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
+ // take on any constant values, they are collected in Constants.
+ SmallVector<CallArgBinding, 8> ActualArgs;
+ if (!isArgumentInteresting(&FormalArg, ActualArgs)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Argument "
+ << FormalArg.getNameOrAsOperand()
+ << " is not interesting\n");
continue;
}
- for (auto *ActualArg : ActualConstArg) {
- InstructionCost Gain =
- ForceFunctionSpecialization
- ? 1
- : getSpecializationBonus(&FormalArg, ActualArg) - Cost;
+ for (const auto &Entry : ActualArgs) {
+ CallBase *Call = Entry.first;
+ Constant *ActualArg = Entry.second;
- if (Gain <= 0)
- continue;
- Worklist.push_back({F, &FormalArg, ActualArg, Gain});
+ auto I = Specializations.insert({Call, SpecializationInfo()});
+ SpecializationInfo &S = I.first->second;
+
+ if (I.second)
+ S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost;
+ if (!ForceFunctionSpecialization)
+ S.Gain += getSpecializationBonus(&FormalArg, ActualArg);
+ S.Args.push_back({&FormalArg, ActualArg});
}
+ }
- if (Worklist.empty())
- continue;
+ // Remove unprofitable specializations.
+ Specializations.remove_if(
+ [](const auto &Entry) { return Entry.second.Gain <= 0; });
- // Sort the candidates in descending order.
- llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) {
- return L.Gain > R.Gain;
- });
+ // Clear the MapVector and return the underlying vector.
+ WorkList = Specializations.takeVector();
- // Truncate the worklist to 'MaxClonesThreshold' candidates if
- // necessary.
- if (Worklist.size() > MaxClonesThreshold) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: number of candidates exceed "
- << "the maximum number of clones threshold.\n"
- << "Truncating worklist to " << MaxClonesThreshold
- << " candidates.\n");
- Worklist.erase(Worklist.begin() + MaxClonesThreshold,
- Worklist.end());
- }
+ // Sort the candidates in descending order.
+ llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
+ return L.second.Gain > R.second.Gain;
+ });
- if (IsPartial || Worklist.size() < ActualConstArg.size())
- for (auto &ActualArg : Worklist)
- ActualArg.Partial = true;
+ // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
+ if (WorkList.size() > MaxClonesThreshold) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
+ << "the maximum number of clones threshold.\n"
+ << "FnSpecialization: Truncating worklist to "
+ << MaxClonesThreshold << " candidates.\n");
+ WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
+ }
- LLVM_DEBUG(dbgs() << "Sorted list of candidates by gain:\n";
- for (auto &C
- : Worklist) {
- dbgs() << "- Function = " << C.Fn->getName() << ", ";
- dbgs() << "FormalArg = " << C.Arg->getName() << ", ";
- dbgs() << "ActualArg = " << C.Const->getName() << ", ";
- dbgs() << "Gain = " << C.Gain << "\n";
- });
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
+ << F->getName() << "\n";
+ for (const auto &Entry
+ : WorkList) {
+ dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain
+ << "\n";
+ for (const ArgInfo &Arg : Entry.second.Args)
+ dbgs() << "FnSpecialization: FormalArg = "
+ << Arg.Formal->getNameOrAsOperand()
+ << ", ActualArg = "
+ << Arg.Actual->getNameOrAsOperand() << "\n";
+ });
- // FIXME: Only one argument per function.
- break;
- }
- return Worklist;
+ return !WorkList.empty();
}
- bool isCandidateFunction(Function *F, FuncList &Specializations) {
+ bool isCandidateFunction(Function *F) {
// Do not specialize the cloned function again.
if (SpecializedFuncs.contains(F))
return false;
@@ -461,44 +514,45 @@ private:
return true;
}
- void specializeFunction(ArgInfo &AI, FuncList &Specializations) {
- Function *Clone = cloneCandidateFunction(AI.Fn);
- Argument *ClonedArg = Clone->getArg(AI.Arg->getArgNo());
+ void specializeFunction(Function *F, SpecializationInfo &S,
+ FuncList &WorkList) {
+ ValueToValueMapTy Mappings;
+ Function *Clone = cloneCandidateFunction(F, Mappings);
// Rewrite calls to the function so that they call the clone instead.
- rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Const);
+ rewriteCallSites(Clone, S.Args, Mappings);
// Initialize the lattice state of the arguments of the function clone,
// marking the argument on which we specialized the function constant
// with the given value.
- Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Const);
+ Solver.markArgInFuncSpecialization(Clone, S.Args);
// Mark all the specialized functions
- Specializations.push_back(Clone);
+ WorkList.push_back(Clone);
NbFunctionsSpecialized++;
// If the function has been completely specialized, the original function
// is no longer needed. Mark it unreachable.
- if (!AI.Partial)
- Solver.markFunctionUnreachable(AI.Fn);
+ if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) {
+ if (auto *CS = dyn_cast<CallBase>(U))
+ return CS->getFunction() == F;
+ return false;
+ })) {
+ Solver.markFunctionUnreachable(F);
+ FullySpecialized.insert(F);
+ }
}
/// Compute and return the cost of specializing function \p F.
InstructionCost getSpecializationCost(Function *F) {
- // Compute the code metrics for the function.
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
- CodeMetrics Metrics;
- for (BasicBlock &BB : *F)
- Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
-
+ CodeMetrics &Metrics = analyzeFunction(F);
// If the code metrics reveal that we shouldn't duplicate the function, we
// shouldn't specialize it. Set the specialization cost to Invalid.
// Or if the lines of codes implies that this function is easy to get
// inlined so that we shouldn't specialize it.
- if (Metrics.notDuplicatable ||
+ if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
(!ForceFunctionSpecialization &&
- Metrics.NumInsts < SmallFunctionThreshold)) {
+ *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
@@ -539,31 +593,20 @@ private:
DominatorTree DT(*F);
LoopInfo LI(DT);
auto &TTI = (GetTTI)(*F);
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for: " << *A
- << "\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
+ << C->getNameOrAsOperand() << "\n");
InstructionCost TotalCost = 0;
for (auto *U : A->users()) {
TotalCost += getUserBonus(U, TTI, LI);
- LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
+ LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
}
// The below heuristic is only concerned with exposing inlining
// opportunities via indirect call promotion. If the argument is not a
- // function pointer, give up.
- if (!isa<PointerType>(A->getType()) ||
- !isa<FunctionType>(A->getType()->getPointerElementType()))
- return TotalCost;
-
- // Since the argument is a function pointer, its incoming constant values
- // should be functions or constant expressions. The code below attempts to
- // look through cast expressions to find the function that will be called.
- Value *CalledValue = C;
- while (isa<ConstantExpr>(CalledValue) &&
- cast<ConstantExpr>(CalledValue)->isCast())
- CalledValue = cast<User>(CalledValue)->getOperand(0);
- Function *CalledFunction = dyn_cast<Function>(CalledValue);
+ // (potentially casted) function pointer, give up.
+ Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
if (!CalledFunction)
return TotalCost;
@@ -603,6 +646,9 @@ private:
Bonus += Params.DefaultThreshold;
else if (IC.isVariable() && IC.getCostDelta() > 0)
Bonus += IC.getCostDelta();
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
+ << " for user " << *U << "\n");
}
return TotalCost + Bonus;
@@ -615,15 +661,12 @@ private:
/// specializing the function based on the incoming values of argument \p A
/// would result in any significant optimization opportunities. If
/// optimization opportunities exist, the constant values of \p A on which to
- /// specialize the function are collected in \p Constants. If the values in
- /// \p Constants represent the complete set of values that \p A can take on,
- /// the function will be completely specialized, and the \p IsPartial flag is
- /// set to false.
+ /// specialize the function are collected in \p Constants.
///
/// \returns true if the function should be specialized on the given
/// argument.
- bool isArgumentInteresting(Argument *A, ConstList &Constants,
- bool &IsPartial) {
+ bool isArgumentInteresting(Argument *A,
+ SmallVectorImpl<CallArgBinding> &Constants) {
// For now, don't attempt to specialize functions based on the values of
// composite types.
if (!A->getType()->isSingleValueType() || A->user_empty())
@@ -632,8 +675,9 @@ private:
// If the argument isn't overdefined, there's nothing to do. It should
// already be constant.
if (!Solver.getLatticeValueFor(A).isOverdefined()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: nothing to do, arg is already "
- << "constant?\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument "
+ << A->getNameOrAsOperand()
+ << " is already constant?\n");
return false;
}
@@ -650,20 +694,26 @@ private:
//
// TODO 2: this currently does not support constants, i.e. integer ranges.
//
- IsPartial = !getPossibleConstants(A, Constants);
- LLVM_DEBUG(dbgs() << "FnSpecialization: interesting arg: " << *A << "\n");
+ getPossibleConstants(A, Constants);
+
+ if (Constants.empty())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
+ << A->getNameOrAsOperand() << "\n");
return true;
}
/// Collect in \p Constants all the constant values that argument \p A can
/// take on.
- ///
- /// \returns true if all of the values the argument can take on are constant
- /// (e.g., the argument's parent function cannot be called with an
- /// overdefined value).
- bool getPossibleConstants(Argument *A, ConstList &Constants) {
+ void getPossibleConstants(Argument *A,
+ SmallVectorImpl<CallArgBinding> &Constants) {
Function *F = A->getParent();
- bool AllConstant = true;
+
+ // SCCP solver does not record an argument that will be constructed on
+ // stack.
+ if (A->hasByValAttr() && !F->onlyReadsMemory())
+ return;
// Iterate over all the call sites of the argument's parent function.
for (User *U : F->users()) {
@@ -672,10 +722,8 @@ private:
auto &CS = *cast<CallBase>(U);
// If the call site has attribute minsize set, that callsite won't be
// specialized.
- if (CS.hasFnAttr(Attribute::MinSize)) {
- AllConstant = false;
+ if (CS.hasFnAttr(Attribute::MinSize))
continue;
- }
// If the parent of the call site will never be executed, we don't need
// to worry about the passed value.
@@ -684,13 +732,7 @@ private:
auto *V = CS.getArgOperand(A->getArgNo());
if (isa<PoisonValue>(V))
- return false;
-
- // For now, constant expressions are fine but only if they are function
- // calls.
- if (auto *CE = dyn_cast<ConstantExpr>(V))
- if (!isa<Function>(CE->getOperand(0)))
- return false;
+ return;
// TrackValueOfGlobalVariable only tracks scalar global variables.
if (auto *GV = dyn_cast<GlobalVariable>(V)) {
@@ -698,36 +740,32 @@ private:
// global values.
if (!GV->isConstant())
if (!SpecializeOnAddresses)
- return false;
+ return;
if (!GV->getValueType()->isSingleValueType())
- return false;
+ return;
}
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
EnableSpecializationForLiteralConstant))
- Constants.push_back(cast<Constant>(V));
- else
- AllConstant = false;
+ Constants.push_back({&CS, cast<Constant>(V)});
}
-
- // If the argument can only take on constant values, AllConstant will be
- // true.
- return AllConstant;
}
/// Rewrite calls to function \p F to call function \p Clone instead.
///
- /// This function modifies calls to function \p F whose argument at index \p
- /// ArgNo is equal to constant \p C. The calls are rewritten to call function
- /// \p Clone instead.
+ /// This function modifies calls to function \p F as long as the actual
+ /// arguments match those in \p Args. Note that for recursive calls we
+ /// need to compare against the cloned formal arguments.
///
/// Callsites that have been marked with the MinSize function attribute won't
/// be specialized and rewritten.
- void rewriteCallSites(Function *F, Function *Clone, Argument &Arg,
- Constant *C) {
- unsigned ArgNo = Arg.getArgNo();
- SmallVector<CallBase *, 4> CallSitesToRewrite;
+ void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args,
+ ValueToValueMapTy &Mappings) {
+ assert(!Args.empty() && "Specialization without arguments");
+ Function *F = Args[0].Formal->getParent();
+
+ SmallVector<CallBase *, 8> CallSitesToRewrite;
for (auto *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -736,35 +774,50 @@ private:
continue;
CallSitesToRewrite.push_back(&CS);
}
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of "
+ << F->getName() << " with " << Clone->getName() << "\n");
+
for (auto *CS : CallSitesToRewrite) {
- if ((CS->getFunction() == Clone && CS->getArgOperand(ArgNo) == &Arg) ||
- CS->getArgOperand(ArgNo) == C) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: "
+ << CS->getFunction()->getName() << " ->" << *CS
+ << "\n");
+ if (/* recursive call */
+ (CS->getFunction() == Clone &&
+ all_of(Args,
+ [CS, &Mappings](const ArgInfo &Arg) {
+ unsigned ArgNo = Arg.Formal->getArgNo();
+ return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal];
+ })) ||
+ /* normal call */
+ all_of(Args, [CS](const ArgInfo &Arg) {
+ unsigned ArgNo = Arg.Formal->getArgNo();
+ return CS->getArgOperand(ArgNo) == Arg.Actual;
+ })) {
CS->setCalledFunction(Clone);
Solver.markOverdefined(CS);
}
}
}
- void updateSpecializedFuncs(FuncList &FuncDecls,
- FuncList &CurrentSpecializations) {
- for (auto *SpecializedFunc : CurrentSpecializations) {
- SpecializedFuncs.insert(SpecializedFunc);
+ void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) {
+ for (auto *F : WorkList) {
+ SpecializedFuncs.insert(F);
// Initialize the state of the newly created functions, marking them
// argument-tracked and executable.
- if (SpecializedFunc->hasExactDefinition() &&
- !SpecializedFunc->hasFnAttribute(Attribute::Naked))
- Solver.addTrackedFunction(SpecializedFunc);
+ if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked))
+ Solver.addTrackedFunction(F);
- Solver.addArgumentTrackedFunction(SpecializedFunc);
- FuncDecls.push_back(SpecializedFunc);
- Solver.markBlockExecutable(&SpecializedFunc->front());
+ Solver.addArgumentTrackedFunction(F);
+ Candidates.push_back(F);
+ Solver.markBlockExecutable(&F->front());
// Replace the function arguments for the specialized functions.
- for (Argument &Arg : SpecializedFunc->args())
+ for (Argument &Arg : F->args())
if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg))
LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: "
- << Arg.getName() << "\n");
+ << Arg.getNameOrAsOperand() << "\n");
}
}
};
@@ -871,22 +924,26 @@ bool llvm::runFunctionSpecialization(
// Initially resolve the constants in all the argument tracked functions.
RunSCCPSolver(FuncDecls);
- SmallVector<Function *, 2> CurrentSpecializations;
+ SmallVector<Function *, 8> WorkList;
unsigned I = 0;
while (FuncSpecializationMaxIters != I++ &&
- FS.specializeFunctions(FuncDecls, CurrentSpecializations)) {
+ FS.specializeFunctions(FuncDecls, WorkList)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n");
// Run the solver for the specialized functions.
- RunSCCPSolver(CurrentSpecializations);
+ RunSCCPSolver(WorkList);
// Replace some unresolved constant arguments.
constantArgPropagation(FuncDecls, M, Solver);
- CurrentSpecializations.clear();
+ WorkList.clear();
Changed = true;
}
- // Clean up the IR by removing ssa_copy intrinsics.
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = "
+ << NumFuncSpecialized << "\n");
+
+ // Remove any ssa_copy intrinsics that may have been introduced.
removeSSACopy(M);
return Changed;
}
diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 5e5d2086adc2..f35827220bb6 100644
--- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -34,7 +33,7 @@ using namespace llvm;
#define DEBUG_TYPE "globaldce"
static cl::opt<bool>
- ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true),
cl::desc("Enable virtual function elimination"));
STATISTIC(NumAliases , "Number of global aliases removed");
@@ -86,6 +85,9 @@ ModulePass *llvm::createGlobalDCEPass() {
/// Returns true if F is effectively empty.
static bool isEmptyFunction(Function *F) {
+ // Skip external functions.
+ if (F->isDeclaration())
+ return false;
BasicBlock &Entry = F->getEntryBlock();
for (auto &I : Entry) {
if (I.isDebugOrPseudoInst())
@@ -214,14 +216,14 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
if (!Ptr) {
LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
VFESafeVTables.erase(VTable);
- return;
+ continue;
}
auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts());
if (!Callee) {
LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n");
VFESafeVTables.erase(VTable);
- return;
+ continue;
}
LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> "
@@ -298,7 +300,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// marked as alive are discarded.
// Remove empty functions from the global ctors list.
- Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+ Changed |= optimizeGlobalCtorsList(
+ M, [](uint32_t, Function *F) { return isEmptyFunction(F); });
// Collect the set of members for each comdat.
for (Function &F : M)
@@ -317,7 +320,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Loop over the module, adding globals which are obviously necessary.
for (GlobalObject &GO : M.global_objects()) {
- Changed |= RemoveUnusedGlobalValue(GO);
+ GO.removeDeadConstantUsers();
// Functions with external linkage are needed if they have a body.
// Externally visible & appending globals are needed, if they have an
// initializer.
@@ -330,7 +333,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Compute direct dependencies of aliases.
for (GlobalAlias &GA : M.aliases()) {
- Changed |= RemoveUnusedGlobalValue(GA);
+ GA.removeDeadConstantUsers();
// Externally visible aliases are needed.
if (!GA.isDiscardableIfUnused())
MarkLive(GA);
@@ -340,7 +343,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Compute direct dependencies of ifuncs.
for (GlobalIFunc &GIF : M.ifuncs()) {
- Changed |= RemoveUnusedGlobalValue(GIF);
+ GIF.removeDeadConstantUsers();
// Externally visible ifuncs are needed.
if (!GIF.isDiscardableIfUnused())
MarkLive(GIF);
@@ -403,7 +406,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Now that all interferences have been dropped, delete the actual objects
// themselves.
auto EraseUnusedGlobalValue = [&](GlobalValue *GV) {
- RemoveUnusedGlobalValue(*GV);
+ GV->removeDeadConstantUsers();
GV->eraseFromParent();
Changed = true;
};
@@ -455,16 +458,3 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-
-// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
-// GlobalValue, looking for the constant pointer ref that may be pointing to it.
-// If found, check to see if the constant pointer ref is safe to destroy, and if
-// so, nuke it. This will reduce the reference count on the global value, which
-// might make it deader.
-//
-bool GlobalDCEPass::RemoveUnusedGlobalValue(GlobalValue &GV) {
- if (GV.use_empty())
- return false;
- GV.removeDeadConstantUsers();
- return GV.use_empty();
-}
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1cb32e32c895..1a1bde4f0668 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
@@ -37,7 +38,6 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -60,7 +60,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
@@ -100,7 +99,7 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
static cl::opt<int> ColdCCRelFreq(
- "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ "coldcc-rel-freq", cl::Hidden, cl::init(2),
cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
"entry frequency, for a call site to be considered cold for enabling"
@@ -232,7 +231,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
if (MemSrc && MemSrc->isConstant()) {
Changed = true;
MTI->eraseFromParent();
- } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
+ } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
if (I->hasOneUse())
Dead.push_back(std::make_pair(I, MTI));
}
@@ -405,9 +404,37 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
for (auto *GVE : GVs) {
DIVariable *Var = GVE->getVariable();
DIExpression *Expr = GVE->getExpression();
+ int64_t CurVarOffsetInBytes = 0;
+ uint64_t CurVarOffsetInBits = 0;
+
+ // Calculate the offset (Bytes), Continue if unknown.
+ if (!Expr->extractIfOffset(CurVarOffsetInBytes))
+ continue;
+
+ // Ignore negative offset.
+ if (CurVarOffsetInBytes < 0)
+ continue;
+
+ // Convert offset to bits.
+ CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
+
+ // Current var starts after the fragment, ignore.
+ if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits))
+ continue;
+
+ uint64_t CurVarSize = Var->getType()->getSizeInBits();
+ // Current variable ends before start of fragment, ignore.
+ if (CurVarSize != 0 &&
+ (CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits)
+ continue;
+
+ // Current variable fits in the fragment.
+ if (CurVarOffsetInBits == FragmentOffsetInBits &&
+ CurVarSize == FragmentSizeInBits)
+ Expr = DIExpression::get(Expr->getContext(), {});
// If the FragmentSize is smaller than the variable,
// emit a fragment expression.
- if (FragmentSizeInBits < VarSize) {
+ else if (FragmentSizeInBits < VarSize) {
if (auto E = DIExpression::createFragmentExpression(
Expr, FragmentOffsetInBits, FragmentSizeInBits))
Expr = *E;
@@ -581,17 +608,14 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
// Will trap.
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == V) {
- //cerr << "NONTRAPPING USE: " << *U;
return false; // Storing the value.
}
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
if (CI->getCalledOperand() != V) {
- //cerr << "NONTRAPPING USE: " << *U;
return false; // Not calling the ptr
}
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
if (II->getCalledOperand() != V) {
- //cerr << "NONTRAPPING USE: " << *U;
return false; // Not calling the ptr
}
} else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
@@ -615,7 +639,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
// the comparing of the value of the created global init bool later in
// optimizeGlobalAddressOfAllocation for the global variable.
} else {
- //cerr << "NONTRAPPING USE: " << *U;
return false;
}
}
@@ -878,7 +901,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
}
}
- SmallPtrSet<Constant *, 1> RepValues;
+ SmallSetVector<Constant *, 1> RepValues;
RepValues.insert(NewGV);
// If there is a comparison against null, we will insert a global bool to
@@ -1015,7 +1038,6 @@ valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
/// accessing the data, and exposes the resultant global to further GlobalOpt.
static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
CallInst *CI,
- AtomicOrdering Ordering,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
if (!isAllocRemovable(CI, TLI))
@@ -1062,7 +1084,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
// its initializer) is ever stored to the global.
static bool
optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
- AtomicOrdering Ordering, const DataLayout &DL,
+ const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1087,7 +1109,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
} else if (isAllocationFn(StoredOnceVal, GetTLI)) {
if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
auto *TLI = &GetTLI(*CI->getFunction());
- if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, Ordering, DL, TLI))
+ if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI))
return true;
}
}
@@ -1257,8 +1279,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
-static bool deleteIfDead(
- GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
+static bool
+deleteIfDead(GlobalValue &GV,
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
+ function_ref<void(Function &)> DeleteFnCallback = nullptr) {
GV.removeDeadConstantUsers();
if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
@@ -1277,6 +1301,10 @@ static bool deleteIfDead(
return false;
LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ if (auto *F = dyn_cast<Function>(&GV)) {
+ if (DeleteFnCallback)
+ DeleteFnCallback(*F);
+ }
GV.eraseFromParent();
++NumDeleted;
return true;
@@ -1416,6 +1444,42 @@ static void makeAllConstantUsesInstructions(Constant *C) {
}
}
+// For a global variable with one store, if the store dominates any loads,
+// those loads will always load the stored value (as opposed to the
+// initializer), even in the presence of recursion.
+static bool forwardStoredOnceStore(
+ GlobalVariable *GV, const StoreInst *StoredOnceStore,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
+ const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
+ // We can do this optimization for non-constants in nosync + norecurse
+ // functions, but globals used in exactly one norecurse functions are already
+ // promoted to an alloca.
+ if (!isa<Constant>(StoredOnceValue))
+ return false;
+ const Function *F = StoredOnceStore->getFunction();
+ SmallVector<LoadInst *> Loads;
+ for (User *U : GV->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->getFunction() == F &&
+ LI->getType() == StoredOnceValue->getType() && LI->isSimple())
+ Loads.push_back(LI);
+ }
+ }
+ // Only compute DT if we have any loads to examine.
+ bool MadeChange = false;
+ if (!Loads.empty()) {
+ auto &DT = LookupDomTree(*const_cast<Function *>(F));
+ for (auto *LI : Loads) {
+ if (DT.dominates(StoredOnceStore, LI)) {
+ LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
+ LI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
static bool
@@ -1572,9 +1636,15 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
+ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
return true;
+ // Try to forward the store to any loads. If we have more than one store, we
+ // may have a store of the initializer between StoredOnceStore and a load.
+ if (GS.NumStores == 1)
+ if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
+ return true;
+
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean. Skip this optimization for AS that doesn't allow an initializer.
if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
@@ -1755,7 +1825,7 @@ hasOnlyColdCalls(Function &F,
return false;
if (!CalledFn->hasLocalLinkage())
return false;
- // Skip over instrinsics since they won't remain as function calls.
+ // Skip over intrinsics since they won't remain as function calls.
if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
continue;
// Check if it's valid to use coldcc calling convention.
@@ -1884,7 +1954,9 @@ OptimizeFunctions(Module &M,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
- SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
+ function_ref<void(Function &F)> ChangedCFGCallback,
+ function_ref<void(Function &F)> DeleteFnCallback) {
bool Changed = false;
@@ -1904,7 +1976,7 @@ OptimizeFunctions(Module &M,
if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
F.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(F, NotDiscardableComdats)) {
+ if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
Changed = true;
continue;
}
@@ -1917,13 +1989,11 @@ OptimizeFunctions(Module &M,
// So, remove unreachable blocks from the function, because a) there's
// no point in analyzing them and b) GlobalOpt should otherwise grow
// some more complicated logic to break these cycles.
- // Removing unreachable blocks might invalidate the dominator so we
- // recalculate it.
+ // Notify the analysis manager that we've modified the function's CFG.
if (!F.isDeclaration()) {
if (removeUnreachableBlocks(F)) {
- auto &DT = LookupDomTree(F);
- DT.recalculate(F);
Changed = true;
+ ChangedCFGCallback(F);
}
}
@@ -2031,6 +2101,9 @@ OptimizeGlobalVars(Module &M,
/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
TargetLibraryInfo *TLI) {
+ // Skip external functions.
+ if (F->isDeclaration())
+ return false;
// Call the function.
Evaluator Eval(DL, TLI);
Constant *RetValDummy;
@@ -2383,15 +2456,19 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
return Changed;
}
-static bool optimizeGlobalsInModule(
- Module &M, const DataLayout &DL,
- function_ref<TargetLibraryInfo &(Function &)> GetTLI,
- function_ref<TargetTransformInfo &(Function &)> GetTTI,
- function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
- function_ref<DominatorTree &(Function &)> LookupDomTree) {
+static bool
+optimizeGlobalsInModule(Module &M, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
+ function_ref<void(Function &F)> ChangedCFGCallback,
+ function_ref<void(Function &F)> DeleteFnCallback) {
SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
bool Changed = false;
bool LocalChange = true;
+ Optional<uint32_t> FirstNotFullyEvaluatedPriority;
+
while (LocalChange) {
LocalChange = false;
@@ -2411,12 +2488,20 @@ static bool optimizeGlobalsInModule(
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
- NotDiscardableComdats);
+ NotDiscardableComdats, ChangedCFGCallback,
+ DeleteFnCallback);
// Optimize global_ctors list.
- LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
- return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
- });
+ LocalChange |=
+ optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
+ if (FirstNotFullyEvaluatedPriority &&
+ *FirstNotFullyEvaluatedPriority != Priority)
+ return false;
+ bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
+ if (!Evaluated)
+ FirstNotFullyEvaluatedPriority = Priority;
+ return Evaluated;
+ });
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
@@ -2457,10 +2542,23 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
+ auto ChangedCFGCallback = [&FAM](Function &F) {
+ FAM.invalidate(F, PreservedAnalyses::none());
+ };
+ auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };
- if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
+ if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
+ ChangedCFGCallback, DeleteFnCallback))
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ // We made sure to clear analyses for deleted functions.
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ // The only place we modify the CFG is when calling
+ // removeUnreachableBlocks(), but there we make sure to invalidate analyses
+ // for modified functions.
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
@@ -2491,8 +2589,13 @@ struct GlobalOptLegacyPass : public ModulePass {
return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
- LookupDomTree);
+ auto ChangedCFGCallback = [&LookupDomTree](Function &F) {
+ auto &DT = LookupDomTree(F);
+ DT.recalculate(F);
+ };
+
+ return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
+ ChangedCFGCallback, nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index e7d698c42fcf..7d9e6135b2eb 100644
--- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -134,9 +134,9 @@ static bool splitGlobal(GlobalVariable &GV) {
}
// Finally, remove the original global. Any remaining uses refer to invalid
- // elements of the global, so replace with undef.
+ // elements of the global, so replace with poison.
if (!GV.use_empty())
- GV.replaceAllUsesWith(UndefValue::get(GV.getType()));
+ GV.replaceAllUsesWith(PoisonValue::get(GV.getType()));
GV.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index a964fcde0396..95e8ae0fd22f 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -29,46 +29,33 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
-#include <limits>
#include <cassert>
+#include <limits>
#include <string>
#define DEBUG_TYPE "hotcoldsplit"
@@ -126,7 +113,8 @@ bool unlikelyExecuted(BasicBlock &BB) {
// mark sanitizer traps as cold.
for (Instruction &I : BB)
if (auto *CB = dyn_cast<CallBase>(&I))
- if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize"))
+ if (CB->hasFnAttr(Attribute::Cold) &&
+ !CB->getMetadata(LLVMContext::MD_nosanitize))
return true;
// The block is cold if it has an unreachable terminator, unless it's
@@ -352,7 +340,7 @@ Function *HotColdSplitting::extractColdRegion(
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
/* BPI */ nullptr, AC, /* AllowVarArgs */ false,
- /* AllowAlloca */ false,
+ /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
/* Suffix */ "cold." + std::to_string(Count));
// Perform a simple cost/benefit analysis to decide whether or not to permit
@@ -740,7 +728,7 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
auto LookupAC = [this](Function &F) -> AssumptionCache * {
if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
@@ -772,7 +760,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index de1c1d379502..ec2b80012ed6 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -24,7 +24,6 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeOpenMPOptCGSCCLegacyPassPass(Registry);
- initializeArgPromotionPass(Registry);
initializeAnnotation2MetadataLegacyPass(Registry);
initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);
@@ -70,10 +69,6 @@ void LLVMInitializeIPO(LLVMPassRegistryRef R) {
initializeIPO(*unwrap(R));
}
-void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createArgumentPromotionPass());
-}
-
void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCalledValuePropagationPass());
}
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index faf7cb7d566a..d75d99e307fd 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -16,8 +16,9 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassManager.h"
@@ -25,8 +26,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
-#include <map>
-#include <set>
#include <vector>
#define DEBUG_TYPE "iroutliner"
@@ -183,11 +182,24 @@ static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
Value *V) {
Optional<unsigned> GVN = Candidate->getGVN(V);
- assert(GVN.hasValue() && "No GVN for incoming value");
+ assert(GVN && "No GVN for incoming value");
Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
- return FoundValueOpt.getValueOr(nullptr);
+ return FoundValueOpt.value_or(nullptr);
+}
+
+BasicBlock *
+OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
+ BasicBlock *BB) {
+ Instruction *FirstNonPHI = BB->getFirstNonPHI();
+ assert(FirstNonPHI && "block is empty?");
+ Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
+ if (!CorrespondingVal)
+ return nullptr;
+ BasicBlock *CorrespondingBlock =
+ cast<Instruction>(CorrespondingVal)->getParent();
+ return CorrespondingBlock;
}
/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
@@ -264,13 +276,33 @@ void OutlinableRegion::splitCandidate() {
// We iterate over the instructions in the region, if we find a PHINode, we
// check if there are predecessors outside of the region, if there are,
// we ignore this region since we are unable to handle the severing of the
- // phi node right now.
+ // phi node right now.
+
+ // TODO: Handle extraneous inputs for PHINodes through variable number of
+ // inputs, similar to how outputs are handled.
BasicBlock::iterator It = StartInst->getIterator();
+ EndBB = BackInst->getParent();
+ BasicBlock *IBlock;
+ BasicBlock *PHIPredBlock = nullptr;
+ bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
unsigned NumPredsOutsideRegion = 0;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!BBSet.contains(PN->getIncomingBlock(i)))
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!BBSet.contains(PN->getIncomingBlock(i))) {
+ PHIPredBlock = PN->getIncomingBlock(i);
++NumPredsOutsideRegion;
+ continue;
+ }
+
+ // We must consider the case there the incoming block to the PHINode is
+ // the same as the final block of the OutlinableRegion. If this is the
+ // case, the branch from this block must also be outlined to be valid.
+ IBlock = PN->getIncomingBlock(i);
+ if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
+ PHIPredBlock = PN->getIncomingBlock(i);
+ ++NumPredsOutsideRegion;
+ }
+ }
if (NumPredsOutsideRegion > 1)
return;
@@ -285,11 +317,9 @@ void OutlinableRegion::splitCandidate() {
// If the region ends with a PHINode, but does not contain all of the phi node
// instructions of the region, we ignore it for now.
- if (isa<PHINode>(BackInst)) {
- EndBB = BackInst->getParent();
- if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
- return;
- }
+ if (isa<PHINode>(BackInst) &&
+ BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
+ return;
// The basic block gets split like so:
// block: block:
@@ -310,6 +340,10 @@ void OutlinableRegion::splitCandidate() {
StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
+ // If there was a PHINode with an incoming block outside the region,
+ // make sure is correctly updated in the newly split block.
+ if (PHIPredBlock)
+ PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB);
CandidateSplit = true;
if (!BackInst->isTerminator()) {
@@ -353,6 +387,25 @@ void OutlinableRegion::reattachCandidate() {
assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
+ // Make sure PHINode references to the block we are merging into are
+ // updated to be incoming blocks from the predecessor to the current block.
+
+ // NOTE: If this is updated such that the outlined block can have more than
+ // one incoming block to a PHINode, this logic will have to updated
+ // to handle multiple precessors instead.
+
+ // We only need to update this if the outlined section contains a PHINode, if
+ // it does not, then the incoming block was never changed in the first place.
+ // On the other hand, if PrevBB has no predecessors, it means that all
+ // incoming blocks to the first block are contained in the region, and there
+ // will be nothing to update.
+ Instruction *StartInst = (*Candidate->begin()).Inst;
+ if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
+ assert(!PrevBB->hasNPredecessorsOrMore(2) &&
+ "PrevBB has more than one predecessor. Should be 0 or 1.");
+ BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
+ PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB);
+ }
PrevBB->getTerminator()->eraseFromParent();
// If we reattaching after outlining, we iterate over the phi nodes to
@@ -501,7 +554,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// the the number has been found to be not the same value in each instance.
for (Value *V : ID.OperVals) {
Optional<unsigned> GVNOpt = C.getGVN(V);
- assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
+ assert(GVNOpt && "Expected a GVN for operand?");
unsigned GVN = GVNOpt.getValue();
// Check if this global value has been found to not be the same already.
@@ -516,7 +569,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// global value number. If the global value does not map to a Constant,
// it is considered to not be the same value.
Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
- if (ConstantMatches.hasValue()) {
+ if (ConstantMatches) {
if (ConstantMatches.getValue())
continue;
else
@@ -597,7 +650,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
"outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
// Transfer the swifterr attribute to the correct function parameter.
- if (Group.SwiftErrorArgument.hasValue())
+ if (Group.SwiftErrorArgument)
Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
Attribute::SwiftError);
@@ -666,6 +719,18 @@ static void moveFunctionData(Function &Old, Function &New,
if (!isa<CallInst>(&Val)) {
// Remove the debug information for outlined functions.
Val.setDebugLoc(DebugLoc());
+
+ // Loop info metadata may contain line locations. Update them to have no
+ // value in the new subprogram since the outlined code could be from
+ // several locations.
+ auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
+ if (DISubprogram *SP = New.getSubprogram())
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return DILocation::get(New.getContext(), Loc->getLine(),
+ Loc->getColumn(), SP, nullptr);
+ return MD;
+ };
+ updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
continue;
}
@@ -691,8 +756,6 @@ static void moveFunctionData(Function &Old, Function &New,
for (Instruction *I : DebugInsts)
I->eraseFromParent();
}
-
- assert(NewEnds.size() > 0 && "No return instruction for new function?");
}
/// Find the the constants that will need to be lifted into arguments
@@ -714,7 +777,7 @@ static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
for (Value *V : (*IDIt).OperVals) {
// Since these are stored before any outlining, they will be in the
// global value numbering.
- unsigned GVN = C.getGVN(V).getValue();
+ unsigned GVN = *C.getGVN(V);
if (isa<Constant>(V))
if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
Inputs.push_back(GVN);
@@ -745,8 +808,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C,
assert(Input && "Have a nullptr as an input");
if (OutputMappings.find(Input) != OutputMappings.end())
Input = OutputMappings.find(Input)->second;
- assert(C.getGVN(Input).hasValue() &&
- "Could not find a numbering for the given input");
+ assert(C.getGVN(Input) && "Could not find a numbering for the given input");
EndInputNumbers.push_back(C.getGVN(Input).getValue());
}
}
@@ -885,11 +947,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// numbering overrides any discovered location for the extracted code.
for (unsigned InputVal : InputGVNs) {
Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
- assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
+ assert(CanonicalNumberOpt && "Canonical number not found?");
unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
Optional<Value *> InputOpt = C.fromGVN(InputVal);
- assert(InputOpt.hasValue() && "Global value number not found?");
+ assert(InputOpt && "Global value number not found?");
Value *Input = InputOpt.getValue();
DenseMap<unsigned, unsigned>::iterator AggArgIt =
@@ -901,7 +963,7 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// argument in the overall function.
if (Input->isSwiftError()) {
assert(
- !Group.SwiftErrorArgument.hasValue() &&
+ !Group.SwiftErrorArgument &&
"Argument already marked with swifterr for this OutlinableGroup!");
Group.SwiftErrorArgument = TypeIndex;
}
@@ -969,12 +1031,11 @@ static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
// We check to see if the value is used by the PHINode from some other
// predecessor not included in the region. If it is, we make sure
// to keep it as an output.
- SmallVector<unsigned, 2> IncomingNumbers(PN.getNumIncomingValues());
- std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0);
- if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
- return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
- !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
- }))
+ if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
+ [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
+ return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
+ !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
+ }))
return true;
// Check if the value is used by any other instructions outside the region.
@@ -1098,30 +1159,72 @@ static hash_code encodePHINodeData(PHINodeData &PND) {
///
/// \param Region - The region that \p PN is an output for.
/// \param PN - The PHINode we are analyzing.
+/// \param Blocks - The blocks for the region we are analyzing.
/// \param AggArgIdx - The argument \p PN will be stored into.
/// \returns An optional holding the assigned canonical number, or None if
/// there is some attribute of the PHINode blocking it from being used.
static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
- PHINode *PN, unsigned AggArgIdx) {
+ PHINode *PN,
+ DenseSet<BasicBlock *> &Blocks,
+ unsigned AggArgIdx) {
OutlinableGroup &Group = *Region.Parent;
IRSimilarityCandidate &Cand = *Region.Candidate;
BasicBlock *PHIBB = PN->getParent();
CanonList PHIGVNs;
- for (Value *Incoming : PN->incoming_values()) {
- // If we cannot find a GVN, this means that the input to the PHINode is
- // not included in the region we are trying to analyze, meaning, that if
- // it was outlined, we would be adding an extra input. We ignore this
- // case for now, and so ignore the region.
+ Value *Incoming;
+ BasicBlock *IncomingBlock;
+ for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
+ Incoming = PN->getIncomingValue(Idx);
+ IncomingBlock = PN->getIncomingBlock(Idx);
+ // If we cannot find a GVN, and the incoming block is included in the region
+ // this means that the input to the PHINode is not included in the region we
+ // are trying to analyze, meaning, that if it was outlined, we would be
+ // adding an extra input. We ignore this case for now, and so ignore the
+ // region.
Optional<unsigned> OGVN = Cand.getGVN(Incoming);
- if (!OGVN.hasValue()) {
+ if (!OGVN && Blocks.contains(IncomingBlock)) {
Region.IgnoreRegion = true;
return None;
}
+ // If the incoming block isn't in the region, we don't have to worry about
+ // this incoming value.
+ if (!Blocks.contains(IncomingBlock))
+ continue;
+
// Collect the canonical numbers of the values in the PHINode.
- unsigned GVN = OGVN.getValue();
+ unsigned GVN = *OGVN;
OGVN = Cand.getCanonicalNum(GVN);
- assert(OGVN.hasValue() && "No GVN found for incoming value?");
+ assert(OGVN && "No GVN found for incoming value?");
+ PHIGVNs.push_back(*OGVN);
+
+ // Find the incoming block and use the canonical numbering as well to define
+ // the hash for the PHINode.
+ OGVN = Cand.getGVN(IncomingBlock);
+
+ // If there is no number for the incoming block, it is becaause we have
+ // split the candidate basic blocks. So we use the previous block that it
+ // was split from to find the valid global value numbering for the PHINode.
+ if (!OGVN) {
+ assert(Cand.getStartBB() == IncomingBlock &&
+ "Unknown basic block used in exit path PHINode.");
+
+ BasicBlock *PrevBlock = nullptr;
+ // Iterate over the predecessors to the incoming block of the
+ // PHINode, when we find a block that is not contained in the region
+ // we know that this is the first block that we split from, and should
+ // have a valid global value numbering.
+ for (BasicBlock *Pred : predecessors(IncomingBlock))
+ if (!Blocks.contains(Pred)) {
+ PrevBlock = Pred;
+ break;
+ }
+ assert(PrevBlock && "Expected a predecessor not in the reigon!");
+ OGVN = Cand.getGVN(PrevBlock);
+ }
+ GVN = *OGVN;
+ OGVN = Cand.getCanonicalNum(GVN);
+ assert(OGVN && "No GVN found for incoming block?");
PHIGVNs.push_back(*OGVN);
}
@@ -1131,11 +1234,10 @@ static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
- assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
+ assert(BBGVN && "Could not find GVN for the incoming block!");
BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
- assert(BBGVN.hasValue() &&
- "Could not find canonical number for the incoming block!");
+ assert(BBGVN && "Could not find canonical number for the incoming block!");
// Create a pair of the exit block canonical value, and the aggregate
// argument location, connected to the canonical numbers stored in the
// PHINode.
@@ -1262,9 +1364,9 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
// If two PHINodes have the same canonical values, but different aggregate
// argument locations, then they will have distinct Canonical Values.
- GVN = getGVNForPHINode(Region, PN, AggArgIdx);
- if (!GVN.hasValue())
- return;
+ GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
+ if (!GVN)
+ return;
} else {
// If we do not have a PHINode we use the global value numbering for the
// output value, to find the canonical number to add to the set of stored
@@ -1413,7 +1515,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Make sure that the argument in the new function has the SwiftError
// argument.
- if (Group.SwiftErrorArgument.hasValue())
+ if (Group.SwiftErrorArgument)
Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
Attribute::SwiftError);
@@ -1520,17 +1622,18 @@ getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
/// \param OutputMappings [in] - The mapping of output values from outlined
/// region to their original values.
/// \param CanonNums [out] - The canonical numbering for the incoming values to
-/// \p PN.
+/// \p PN paired with their incoming block.
/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
/// of \p Region rather than the overall function's call.
-static void
-findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
- const DenseMap<Value *, Value *> &OutputMappings,
- DenseSet<unsigned> &CanonNums,
- bool ReplacedWithOutlinedCall = true) {
+static void findCanonNumsForPHI(
+ PHINode *PN, OutlinableRegion &Region,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
+ bool ReplacedWithOutlinedCall = true) {
// Iterate over the incoming values.
for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
Value *IVal = PN->getIncomingValue(Idx);
+ BasicBlock *IBlock = PN->getIncomingBlock(Idx);
// If we have an argument as incoming value, we need to grab the passed
// value from the call itself.
if (Argument *A = dyn_cast<Argument>(IVal)) {
@@ -1545,10 +1648,10 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
// Find and add the canonical number for the incoming value.
Optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
- assert(GVN.hasValue() && "No GVN for incoming value");
+ assert(GVN && "No GVN for incoming value");
Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
- assert(CanonNum.hasValue() && "No Canonical Number for GVN");
- CanonNums.insert(*CanonNum);
+ assert(CanonNum && "No Canonical Number for GVN");
+ CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
}
}
@@ -1557,19 +1660,26 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
/// function.
///
/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
-/// \param Region [in] - The OutlinableRegion containing \p PN.
+/// \param Region [in] - The OutlinableRegion containing \p PN.
/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
/// \p PN in.
/// \param OutputMappings [in] - The mapping of output values from outlined
/// region to their original values.
+/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
+/// matched.
/// \return the newly found or created PHINode in \p OverallPhiBlock.
static PHINode*
findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
BasicBlock *OverallPhiBlock,
- const DenseMap<Value *, Value *> &OutputMappings) {
+ const DenseMap<Value *, Value *> &OutputMappings,
+ DenseSet<PHINode *> &UsedPHIs) {
OutlinableGroup &Group = *Region.Parent;
- DenseSet<unsigned> PNCanonNums;
+
+ // A list of the canonical numbering assigned to each incoming value, paired
+ // with the incoming block for the PHINode passed into this function.
+ SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
+
// We have to use the extracted function since we have merged this region into
// the overall function yet. We make sure to reassign the argument numbering
// since it is possible that the argument ordering is different between the
@@ -1578,18 +1688,61 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
/* ReplacedWithOutlinedCall = */ false);
OutlinableRegion *FirstRegion = Group.Regions[0];
- DenseSet<unsigned> CurrentCanonNums;
+
+ // A list of the canonical numbering assigned to each incoming value, paired
+ // with the incoming block for the PHINode that we are currently comparing
+ // the passed PHINode to.
+ SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
+
// Find the Canonical Numbering for each PHINode, if it matches, we replace
// the uses of the PHINode we are searching for, with the found PHINode.
for (PHINode &CurrPN : OverallPhiBlock->phis()) {
+ // If this PHINode has already been matched to another PHINode to be merged,
+ // we skip it.
+ if (UsedPHIs.contains(&CurrPN))
+ continue;
+
CurrentCanonNums.clear();
findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
/* ReplacedWithOutlinedCall = */ true);
- if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) {
- return CurrentCanonNums.contains(CanonNum);
- }))
+ // If the list of incoming values is not the same length, then they cannot
+ // match since there is not an analogue for each incoming value.
+ if (PNCanonNums.size() != CurrentCanonNums.size())
+ continue;
+
+ bool FoundMatch = true;
+
+ // We compare the canonical value for each incoming value in the passed
+ // in PHINode to one already present in the outlined region. If the
+ // incoming values do not match, then the PHINodes do not match.
+
+ // We also check to make sure that the incoming block matches as well by
+ // finding the corresponding incoming block in the combined outlined region
+ // for the current outlined region.
+ for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
+ std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
+ std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
+ if (ToCompareTo.first != ToAdd.first) {
+ FoundMatch = false;
+ break;
+ }
+
+ BasicBlock *CorrespondingBlock =
+ Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
+ assert(CorrespondingBlock && "Found block is nullptr");
+ if (CorrespondingBlock != ToCompareTo.second) {
+ FoundMatch = false;
+ break;
+ }
+ }
+
+ // If all incoming values and branches matched, then we can merge
+ // into the found PHINode.
+ if (FoundMatch) {
+ UsedPHIs.insert(&CurrPN);
return &CurrPN;
+ }
}
// If we've made it here, it means we weren't able to replace the PHINode, so
@@ -1603,12 +1756,8 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
// Find corresponding basic block in the overall function for the incoming
// block.
- Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI();
- assert(FirstNonPHI && "Incoming block is empty?");
- Value *CorrespondingVal =
- Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI);
- assert(CorrespondingVal && "Value is nullptr?");
- BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent();
+ BasicBlock *BlockToUse =
+ Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
NewPN->setIncomingBlock(Idx, BlockToUse);
// If we have an argument we make sure we replace using the argument from
@@ -1623,6 +1772,10 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
assert(Val && "Value is nullptr?");
+ DenseMap<Value *, Value *>::iterator RemappedIt =
+ FirstRegion->RemappedArguments.find(Val);
+ if (RemappedIt != FirstRegion->RemappedArguments.end())
+ Val = RemappedIt->second;
NewPN->setIncomingValue(Idx, Val);
}
return NewPN;
@@ -1649,6 +1802,7 @@ replaceArgumentUses(OutlinableRegion &Region,
if (FirstFunction)
DominatingFunction = Group.OutlinedFunction;
DominatorTree DT(*DominatingFunction);
+ DenseSet<PHINode *> UsedPHIs;
for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
ArgIdx++) {
@@ -1665,6 +1819,8 @@ replaceArgumentUses(OutlinableRegion &Region,
<< *Region.ExtractedFunction << " with " << *AggArg
<< " in function " << *Group.OutlinedFunction << "\n");
Arg->replaceAllUsesWith(AggArg);
+ Value *V = Region.Call->getArgOperand(ArgIdx);
+ Region.RemappedArguments.insert(std::make_pair(V, AggArg));
continue;
}
@@ -1713,7 +1869,7 @@ replaceArgumentUses(OutlinableRegion &Region,
// If this is storing a PHINode, we must make sure it is included in the
// overall function.
if (!isa<PHINode>(ValueOperand) ||
- Region.Candidate->getGVN(ValueOperand).hasValue()) {
+ Region.Candidate->getGVN(ValueOperand).has_value()) {
if (FirstFunction)
continue;
Value *CorrVal =
@@ -1725,7 +1881,7 @@ replaceArgumentUses(OutlinableRegion &Region,
PHINode *PN = cast<PHINode>(SI->getValueOperand());
// If it has a value, it was not split by the code extractor, which
// is what we are looking for.
- if (Region.Candidate->getGVN(PN).hasValue())
+ if (Region.Candidate->getGVN(PN))
continue;
// We record the parent block for the PHINode in the Region so that
@@ -1748,8 +1904,8 @@ replaceArgumentUses(OutlinableRegion &Region,
// For our PHINode, we find the combined canonical numbering, and
// attempt to find a matching PHINode in the overall PHIBlock. If we
// cannot, we copy the PHINode and move it into this new block.
- PHINode *NewPN =
- findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings);
+ PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
+ OutputMappings, UsedPHIs);
NewI->setOperand(0, NewPN);
}
@@ -1923,7 +2079,7 @@ static void alignOutputBlockWithAggFunc(
// If there is, we remove the new output blocks. If it does not,
// we add it to our list of sets of output blocks.
- if (MatchingBB.hasValue()) {
+ if (MatchingBB) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
<< MatchingBB.getValue());
@@ -2279,6 +2435,9 @@ void IROutliner::pruneIncompatibleRegions(
if (BBHasAddressTaken)
continue;
+ if (IRSC.getFunction()->hasOptNone())
+ continue;
+
if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
!OutlineFromLinkODRs)
continue;
@@ -2343,9 +2502,9 @@ static Value *findOutputValueInRegion(OutlinableRegion &Region,
OutputCanon = *It->second.second.begin();
}
Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
- assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
+ assert(OGVN && "Could not find GVN for Canonical Number?");
Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
- assert(OV.hasValue() && "Could not find value for GVN?");
+ assert(OV && "Could not find value for GVN?");
return *OV;
}
@@ -2400,11 +2559,8 @@ static InstructionCost findCostForOutputBlocks(Module &M,
for (Value *V : ID.OperVals) {
BasicBlock *BB = static_cast<BasicBlock *>(V);
- DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
- if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
- continue;
- FoundBlocks.insert(BB);
- NumOutputBranches++;
+ if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
+ NumOutputBranches++;
}
}
@@ -2520,7 +2676,7 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
// If we found an output register, place a mapping of the new value
// to the original in the mapping.
- if (!OutputIdx.hasValue())
+ if (!OutputIdx)
return;
if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
@@ -2680,7 +2836,7 @@ unsigned IROutliner::doOutline(Module &M) {
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, "outlined");
+ false, nullptr, "outlined");
findAddInputsOutputs(M, *OS, NotSame);
if (!OS->IgnoreRegion)
OutlinedRegions.push_back(OS);
@@ -2791,7 +2947,7 @@ unsigned IROutliner::doOutline(Module &M) {
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, "outlined");
+ false, nullptr, "outlined");
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
@@ -2874,7 +3030,7 @@ bool IROutlinerLegacyPass::runOnModule(Module &M) {
std::unique_ptr<OptimizationRemarkEmitter> ORE;
auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
auto GTTI = [this](Function &F) -> TargetTransformInfo & {
@@ -2905,7 +3061,7 @@ PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
std::function<OptimizationRemarkEmitter &(Function &)> GORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
if (IROutliner(GTTI, GIRSI, GORE).run(M))
diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index c32e09875a12..76f8f1a7a482 100644
--- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -9,11 +9,8 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -32,7 +29,7 @@ static bool inferAllPrototypeAttributes(
// explicitly visited by CGSCC passes in the new pass manager.)
if (F.isDeclaration() && !F.hasOptNone()) {
if (!F.hasFnAttribute(Attribute::NoBuiltin))
- Changed |= inferLibFuncAttributes(F, GetTLI(F));
+ Changed |= inferNonMandatoryLibFuncAttrs(F, GetTLI(F));
Changed |= inferAttributesFromOthers(F);
}
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 76f1d0c54d08..2143e39d488d 100644
--- a/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,14 +12,8 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 49babc24cb82..4d32266eb9ea 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -14,21 +14,21 @@
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineOrder.h"
@@ -37,11 +37,9 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -67,8 +65,6 @@
#include <algorithm>
#include <cassert>
#include <functional>
-#include <sstream>
-#include <tuple>
#include <utility>
#include <vector>
@@ -92,11 +88,28 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
+static cl::opt<int> IntraSCCCostMultiplier(
+ "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
+ cl::desc(
+ "Cost multiplier to multiply onto inlined call sites where the "
+ "new call was previously an intra-SCC call (not relevant when the "
+ "original call was already intra-SCC). This can accumulate over "
+ "multiple inlinings (e.g. if a call site already had a cost "
+ "multiplier and one of its inlined calls was also subject to "
+ "this, the inlined call would have the original multiplier "
+ "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
+ "inlining through a child SCC which can cause terrible compile times"));
+
/// A flag for test, so we can print the content of the advisor when running it
/// as part of the default (e.g. -O3) pipeline.
static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
cl::init(false), cl::Hidden);
+/// Allows printing the contents of the advisor after each SCC inliner pass.
+static cl::opt<bool>
+ EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing",
+ cl::init(false), cl::Hidden);
+
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
static cl::opt<std::string> CGSCCInlineReplayFile(
@@ -150,10 +163,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
"<Line Number>:<Column Number>.<Discriminator> (default)")),
cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
-static cl::opt<bool> InlineEnablePriorityOrder(
- "inline-enable-priority-order", cl::Hidden, cl::init(false),
- cl::desc("Enable the priority inline order for the inliner"));
-
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -708,8 +717,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
// duration of the inliner pass, and thus the lifetime of the owned advisor.
// The one we would get from the MAM can be invalidated as a result of the
// inliner's activity.
- OwnedAdvisor =
- std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
+ M, FAM, getInlineParams(),
+ InlineContext{LTOPhase, InlinePass::CGSCCInliner});
if (!CGSCCInlineReplayFile.empty())
OwnedAdvisor = getReplayInlineAdvisor(
@@ -718,7 +728,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
CGSCCInlineReplayScope,
CGSCCInlineReplayFallback,
{CGSCCInlineReplayFormat}},
- /*EmitRemarks=*/true);
+ /*EmitRemarks=*/true,
+ InlineContext{LTOPhase,
+ InlinePass::ReplayCGSCCInliner});
return *OwnedAdvisor;
}
@@ -744,7 +756,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
.getManager();
InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
- Advisor.onPassEntry();
+ Advisor.onPassEntry(&InitialC);
auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); });
@@ -773,12 +785,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
- if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
- else
- Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
- assert(Calls != nullptr && "Expected an initialized InlineOrder");
+ DefaultInlineOrder<std::pair<CallBase *, int>> Calls;
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -793,7 +800,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls->push({CB, -1});
+ Calls.push({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@@ -807,7 +814,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
}
- if (Calls->empty())
+ if (Calls.empty())
return PreservedAnalyses::all();
// Capture updatable variable for the current SCC.
@@ -833,15 +840,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
SmallVector<Function *, 4> DeadFunctionsInComdats;
// Loop forward over all of the calls.
- while (!Calls->empty()) {
+ while (!Calls.empty()) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls->front().first->getCaller();
+ Function &F = *Calls.front().first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C) {
- Calls->pop();
+ Calls.pop();
continue;
}
@@ -857,8 +864,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
- auto P = Calls->pop();
+ while (!Calls.empty() && Calls.front().first->getCaller() == &F) {
+ auto P = Calls.pop();
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
Function &Callee = *CB->getCalledFunction();
@@ -876,8 +883,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
- if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
- UR.InlinedInternalEdges.count({&N, C})) {
+ LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
+ if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
@@ -897,6 +904,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
+ int CBCostMult =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
+ .value_or(1);
+
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
@@ -935,9 +947,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (tryPromoteCall(*ICB))
NewCallee = ICB->getCalledFunction();
}
- if (NewCallee)
- if (!NewCallee->isDeclaration())
- Calls->push({ICB, NewHistoryID});
+ if (NewCallee) {
+ if (!NewCallee->isDeclaration()) {
+ Calls.push({ICB, NewHistoryID});
+ // Continually inlining through an SCC can result in huge compile
+ // times and bloated code since we arbitrarily stop at some point
+ // when the inliner decides it's not profitable to inline anymore.
+ // We attempt to mitigate this by making these calls exponentially
+ // more expensive.
+ // This doesn't apply to calls in the same SCC since if we do
+ // inline through the SCC the function will end up being
+ // self-recursive which the inliner bails out on, and inlining
+ // within an SCC is necessary for performance.
+ if (CalleeSCC != C &&
+ CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
+ Attribute NewCBCostMult = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineCostMultiplierAttributeName,
+ itostr(CBCostMult * IntraSCCCostMultiplier));
+ ICB->addFnAttr(NewCBCostMult);
+ }
+ }
+ }
}
}
@@ -953,7 +984,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() &&
!CG.isLibFunction(Callee)) {
if (Callee.hasLocalLinkage() || !Callee.hasComdat()) {
- Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ Calls.erase_if([&](const std::pair<CallBase *, int> &Call) {
return Call.first->getCaller() == &Callee;
});
// Clear the body and queue the function itself for deletion when we
@@ -1083,17 +1114,24 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
bool MandatoryFirst,
+ InlineContext IC,
InliningAdvisorMode Mode,
unsigned MaxDevirtIterations)
- : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) {
+ : Params(Params), IC(IC), Mode(Mode),
+ MaxDevirtIterations(MaxDevirtIterations) {
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
// because it makes profile annotation in the backend inaccurate.
- if (MandatoryFirst)
+ if (MandatoryFirst) {
PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
+ if (EnablePostSCCAdvisorPrinting)
+ PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs()));
+ }
PM.addPass(InlinerPass());
+ if (EnablePostSCCAdvisorPrinting)
+ PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs()));
}
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
@@ -1103,7 +1141,8 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
{CGSCCInlineReplayFile,
CGSCCInlineReplayScope,
CGSCCInlineReplayFallback,
- {CGSCCInlineReplayFormat}})) {
+ {CGSCCInlineReplayFormat}},
+ IC)) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp
index 692e445cb7cb..5aa5b905f06c 100644
--- a/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -19,7 +19,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Internalize.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
@@ -33,8 +32,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "internalize"
diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index d9a59dd35fde..ad1927c09803 100644
--- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -23,14 +23,9 @@
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include <fstream>
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "loop-extract"
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 8e83d7bcb6c2..d5f1d291f41f 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1223,6 +1223,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
static const unsigned kX86JumpTableEntrySize = 8;
static const unsigned kARMJumpTableEntrySize = 4;
static const unsigned kARMBTIJumpTableEntrySize = 8;
+static const unsigned kRISCVJumpTableEntrySize = 8;
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
switch (Arch) {
@@ -1238,6 +1239,9 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
if (BTE->getZExtValue())
return kARMBTIJumpTableEntrySize;
return kARMJumpTableEntrySize;
+ case Triple::riscv32:
+ case Triple::riscv64:
+ return kRISCVJumpTableEntrySize;
default:
report_fatal_error("Unsupported architecture for jump tables");
}
@@ -1265,6 +1269,9 @@ void LowerTypeTestsModule::createJumpTableEntry(
AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::thumb) {
AsmOS << "b.w $" << ArgIndex << "\n";
+ } else if (JumpTableArch == Triple::riscv32 ||
+ JumpTableArch == Triple::riscv64) {
+ AsmOS << "tail $" << ArgIndex << "@plt\n";
} else {
report_fatal_error("Unsupported architecture for jump tables");
}
@@ -1282,7 +1289,8 @@ Type *LowerTypeTestsModule::getJumpTableEntryType() {
void LowerTypeTestsModule::buildBitSetsFromFunctions(
ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
- Arch == Triple::thumb || Arch == Triple::aarch64)
+ Arch == Triple::thumb || Arch == Triple::aarch64 ||
+ Arch == Triple::riscv32 || Arch == Triple::riscv64)
buildBitSetsFromFunctionsNative(TypeIds, Functions);
else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
buildBitSetsFromFunctionsWASM(TypeIds, Functions);
@@ -1427,6 +1435,11 @@ void LowerTypeTestsModule::createJumpTable(
F->addFnAttr("branch-target-enforcement", "false");
F->addFnAttr("sign-return-address", "none");
}
+ if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) {
+ // Make sure the jump table assembly is not modified by the assembler or
+ // the linker.
+ F->addFnAttr("target-features", "-c,-relax");
+ }
// Make sure we don't emit .eh_frame for this function.
F->addFnAttr(Attribute::NoUnwind);
@@ -2187,11 +2200,7 @@ bool LowerTypeTestsModule::lower() {
}
Sets.emplace_back(I, MaxUniqueId);
}
- llvm::sort(Sets,
- [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
- const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
- return S1.second < S2.second;
- });
+ llvm::sort(Sets, llvm::less_second());
// For each disjoint set we found...
for (const auto &S : Sets) {
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 97ef872c5499..b850591b4aa6 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -88,12 +88,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -113,7 +112,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/ValueMap.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -121,8 +119,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -139,10 +137,10 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
-static cl::opt<unsigned> NumFunctionsForSanityCheck(
- "mergefunc-sanity",
- cl::desc("How many functions in module could be used for "
- "MergeFunctions pass sanity check. "
+static cl::opt<unsigned> NumFunctionsForVerificationCheck(
+ "mergefunc-verify",
+ cl::desc("How many functions in a module could be used for "
+ "MergeFunctions to pass a basic correctness check. "
"'0' disables this check. Works only with '-debug' key."),
cl::init(0), cl::Hidden);
@@ -228,10 +226,13 @@ private:
/// analyzed again.
std::vector<WeakTrackingVH> Deferred;
+ /// Set of values marked as used in llvm.used and llvm.compiler.used.
+ SmallPtrSet<GlobalValue *, 4> Used;
+
#ifndef NDEBUG
/// Checks the rules of order relation introduced among functions set.
- /// Returns true, if sanity check has been passed, and false if failed.
- bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
+ /// Returns true, if check has been passed, and false if failed.
+ bool doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist);
#endif
/// Insert a ComparableFunction into the FnTree, or merge it away if it's
@@ -330,12 +331,12 @@ PreservedAnalyses MergeFunctionsPass::run(Module &M,
}
#ifndef NDEBUG
-bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
- if (const unsigned Max = NumFunctionsForSanityCheck) {
+bool MergeFunctions::doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist) {
+ if (const unsigned Max = NumFunctionsForVerificationCheck) {
unsigned TripleNumber = 0;
bool Valid = true;
- dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
+ dbgs() << "MERGEFUNC-VERIFY: Started for first " << Max << " functions.\n";
unsigned i = 0;
for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
@@ -351,7 +352,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
- dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
+ dbgs() << "MERGEFUNC-VERIFY: Non-symmetric; triple: " << TripleNumber
<< "\n";
dbgs() << *F1 << '\n' << *F2 << '\n';
Valid = false;
@@ -384,7 +385,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
}
if (!Transitive) {
- dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
+ dbgs() << "MERGEFUNC-VERIFY: Non-transitive; triple: "
<< TripleNumber << "\n";
dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
<< Res4 << "\n";
@@ -395,7 +396,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
}
}
- dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
+ dbgs() << "MERGEFUNC-VERIFY: " << (Valid ? "Passed." : "Failed.") << "\n";
return Valid;
}
return true;
@@ -410,6 +411,11 @@ static bool isEligibleForMerging(Function &F) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
+ SmallVector<GlobalValue *, 4> UsedV;
+ collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false);
+ collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/true);
+ Used.insert(UsedV.begin(), UsedV.end());
+
// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
@@ -436,7 +442,7 @@ bool MergeFunctions::runOnModule(Module &M) {
std::vector<WeakTrackingVH> Worklist;
Deferred.swap(Worklist);
- LLVM_DEBUG(doSanityCheck(Worklist));
+ LLVM_DEBUG(doFunctionalCheck(Worklist));
LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n');
LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
@@ -456,6 +462,7 @@ bool MergeFunctions::runOnModule(Module &M) {
FnTree.clear();
FNodesInTree.clear();
GlobalNumbers.clear();
+ Used.clear();
return Changed;
}
@@ -484,7 +491,7 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
if (SrcTy->isStructTy()) {
assert(DestTy->isStructTy());
assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
- Value *Result = UndefValue::get(DestTy);
+ Value *Result = PoisonValue::get(DestTy);
for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
Value *Element = createCast(
Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
@@ -828,7 +835,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// For better debugability, under MergeFunctionsPDI, we do not modify G's
// call sites to point to F even when within the same translation unit.
if (!G->isInterposable() && !MergeFunctionsPDI) {
- if (G->hasGlobalUnnamedAddr()) {
+ // Functions referred to by llvm.used/llvm.compiler.used are special:
+ // there are uses of the symbol name that are not visible to LLVM,
+ // usually from inline asm.
+ if (G->hasGlobalUnnamedAddr() && !Used.contains(G)) {
// G might have been a key in our GlobalNumberState, and it's illegal
// to replace a key in ValueMap<GlobalValue *> with a non-global.
GlobalNumbers.erase(G);
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index d515303e4911..143715006512 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -14,43 +14,33 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ModuleInliner.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineOrder.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <cassert>
-#include <functional>
using namespace llvm;
@@ -94,7 +84,9 @@ InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
// inliner pass, and thus the lifetime of the owned advisor. The one we
// would get from the MAM can be invalidated as a result of the inliner's
// activity.
- OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params);
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
+ M, FAM, Params,
+ InlineContext{LTOPhase, InlinePass::ModuleInliner});
return *OwnedAdvisor;
}
@@ -119,7 +111,9 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode, {})) {
+ if (!IAA.tryCreate(
+ Params, Mode, {},
+ InlineContext{LTOPhase, InlinePass::ModuleInliner})) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
@@ -153,7 +147,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
// the SCC inliner, which need some refactoring.
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
+ Calls = std::make_unique<PriorityInlineOrder>(
+ std::make_unique<SizePriority>());
else
Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
assert(Calls != nullptr && "Expected an initialized InlineOrder");
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 2d765fb6ce6d..227ad8501f25 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -49,7 +49,6 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
-#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <algorithm>
@@ -59,17 +58,16 @@ using namespace omp;
#define DEBUG_TYPE "openmp-opt"
static cl::opt<bool> DisableOpenMPOptimizations(
- "openmp-opt-disable", cl::ZeroOrMore,
- cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
- cl::init(false));
+ "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
+ cl::Hidden, cl::init(false));
static cl::opt<bool> EnableParallelRegionMerging(
- "openmp-opt-enable-merging", cl::ZeroOrMore,
+ "openmp-opt-enable-merging",
cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
- DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
+ DisableInternalization("openmp-opt-disable-internalization",
cl::desc("Disable function internalization."),
cl::Hidden, cl::init(false));
@@ -85,42 +83,47 @@ static cl::opt<bool> HideMemoryTransferLatency(
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptDeglobalization(
- "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
+ "openmp-opt-disable-deglobalization",
cl::desc("Disable OpenMP optimizations involving deglobalization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptSPMDization(
- "openmp-opt-disable-spmdization", cl::ZeroOrMore,
+ "openmp-opt-disable-spmdization",
cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptFolding(
- "openmp-opt-disable-folding", cl::ZeroOrMore,
+ "openmp-opt-disable-folding",
cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
cl::init(false));
static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
- "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
+ "openmp-opt-disable-state-machine-rewrite",
cl::desc("Disable OpenMP optimizations that replace the state machine."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptBarrierElimination(
- "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
+ "openmp-opt-disable-barrier-elimination",
cl::desc("Disable OpenMP optimizations that eliminate barriers."),
cl::Hidden, cl::init(false));
static cl::opt<bool> PrintModuleAfterOptimizations(
- "openmp-opt-print-module", cl::ZeroOrMore,
+ "openmp-opt-print-module-after",
cl::desc("Print the current module after OpenMP optimizations."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintModuleBeforeOptimizations(
+ "openmp-opt-print-module-before",
+ cl::desc("Print the current module before OpenMP optimizations."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> AlwaysInlineDeviceFunctions(
- "openmp-opt-inline-device", cl::ZeroOrMore,
+ "openmp-opt-inline-device",
cl::desc("Inline all applicible functions on the device."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
- EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
+ EnableVerboseRemarks("openmp-opt-verbose-remarks",
cl::desc("Enables more verbose remarks."), cl::Hidden,
cl::init(false));
@@ -129,6 +132,11 @@ static cl::opt<unsigned>
cl::desc("Maximal number of attributor iterations."),
cl::init(256));
+static cl::opt<unsigned>
+ SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
+ cl::desc("Maximum amount of shared memory to use."),
+ cl::init(std::numeric_limits<unsigned>::max()));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -493,11 +501,14 @@ struct OMPInformationCache : public InformationCache {
// Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
// functions, except if `optnone` is present.
- for (Function &F : M) {
- for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
- if (F.getName().startswith(Prefix) &&
- !F.hasFnAttribute(Attribute::OptimizeNone))
- F.removeFnAttr(Attribute::NoInline);
+ if (isOpenMPDevice(M)) {
+ for (Function &F : M) {
+ for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+ if (F.hasFnAttribute(Attribute::NoInline) &&
+ F.getName().startswith(Prefix) &&
+ !F.hasFnAttribute(Attribute::OptimizeNone))
+ F.removeFnAttr(Attribute::NoInline);
+ }
}
// TODO: We should attach the attributes defined in OMPKinds.def.
@@ -591,7 +602,7 @@ struct KernelInfoState : AbstractState {
/// Abstract State interface
///{
- KernelInfoState() {}
+ KernelInfoState() = default;
KernelInfoState(bool BestState) {
if (!BestState)
indicatePessimisticFixpoint();
@@ -926,8 +937,7 @@ private:
SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
BasicBlock *StartBB = nullptr, *EndBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -966,8 +976,7 @@ private:
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
ParentBB->getTerminator()->eraseFromParent();
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -1107,10 +1116,8 @@ private:
// callbacks.
SmallVector<Value *, 8> Args;
for (auto *CI : MergableCIs) {
- Value *Callee =
- CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
- FunctionType *FT =
- cast<FunctionType>(Callee->getType()->getPointerElementType());
+ Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
+ FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
@@ -1458,7 +1465,6 @@ private:
case Intrinsic::nvvm_barrier0_and:
case Intrinsic::nvvm_barrier0_or:
case Intrinsic::nvvm_barrier0_popc:
- case Intrinsic::amdgcn_s_barrier:
return true;
default:
break;
@@ -2120,6 +2126,8 @@ private:
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII NumThreads(
+ OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
@@ -2407,8 +2415,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
auto CallCheck = [&](Instruction &I) {
Optional<Value *> ReplVal = getValueForCall(A, I, ICV);
- if (ReplVal.hasValue() &&
- ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
+ if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
HasChanged = ChangeStatus::CHANGED;
return true;
@@ -2468,7 +2475,8 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (ICVTrackingAA.isAssumedTracked()) {
Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
- if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache)))
+ if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
+ OMPInfoCache)))
return URV;
}
@@ -2509,13 +2517,13 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (ValuesMap.count(CurrInst)) {
Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
// Unknown value, track new.
- if (!ReplVal.hasValue()) {
+ if (!ReplVal) {
ReplVal = NewReplVal;
break;
}
// If we found a new value, we can't know the icv value anymore.
- if (NewReplVal.hasValue())
+ if (NewReplVal)
if (ReplVal != NewReplVal)
return nullptr;
@@ -2523,11 +2531,11 @@ struct AAICVTrackerFunction : public AAICVTracker {
}
Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
- if (!NewReplVal.hasValue())
+ if (!NewReplVal)
continue;
// Unknown value, track new.
- if (!ReplVal.hasValue()) {
+ if (!ReplVal) {
ReplVal = NewReplVal;
break;
}
@@ -2539,7 +2547,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
}
// If we are in the same BB and we have a value, we are done.
- if (CurrBB == I->getParent() && ReplVal.hasValue())
+ if (CurrBB == I->getParent() && ReplVal)
return ReplVal;
// Go through all predecessors and add terminators for analysis.
@@ -2597,7 +2605,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
ICVTrackingAA.getReplacementValue(ICV, &I, A);
// If we found a second ICV value there is no unique returned value.
- if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
+ if (UniqueICVValue && UniqueICVValue != NewReplVal)
return false;
UniqueICVValue = NewReplVal;
@@ -2648,10 +2656,10 @@ struct AAICVTrackerCallSite : AAICVTracker {
}
ChangeStatus manifest(Attributor &A) override {
- if (!ReplVal.hasValue() || !ReplVal.getValue())
+ if (!ReplVal || !*ReplVal)
return ChangeStatus::UNCHANGED;
- A.changeValueAfterManifest(*getCtxI(), **ReplVal);
+ A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
A.deleteAfterManifest(*getCtxI());
return ChangeStatus::CHANGED;
@@ -2789,7 +2797,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
/// Total number of basic blocks in this function.
- long unsigned NumBBs;
+ long unsigned NumBBs = 0;
};
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
@@ -2952,12 +2960,23 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
}
void initialize(Attributor &A) override {
+ if (DisableOpenMPOptDeglobalization) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+ Attributor::SimplifictionCallbackTy SCB =
+ [](const IRPosition &, const AbstractAttribute *,
+ bool &) -> Optional<Value *> { return nullptr; };
for (User *U : RFI.Declaration->users())
- if (CallBase *CB = dyn_cast<CallBase>(U))
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
MallocCalls.insert(CB);
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
+ SCB);
+ }
findPotentialRemovedFreeCalls(A);
}
@@ -2999,6 +3018,14 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
+ if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
+ LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
+ << " with shared memory."
+ << " Shared memory usage is limited to "
+ << SharedMemoryLimit << " bytes\n");
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
<< " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
@@ -3029,11 +3056,12 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
"HeapToShared on allocation without alignment attribute");
SharedMem->setAlignment(MaybeAlign(Alignment));
- A.changeValueAfterManifest(*CB, *NewBuffer);
+ A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
A.deleteAfterManifest(*CB);
A.deleteAfterManifest(*FreeCalls.front());
- NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
+ SharedMemoryUsed += AllocSize->getZExtValue();
+ NumBytesMovedToSharedMemory = SharedMemoryUsed;
Changed = ChangeStatus::CHANGED;
}
@@ -3069,6 +3097,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
SmallSetVector<CallBase *, 4> MallocCalls;
/// Collection of potentially removed free calls in a function.
SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
+ /// The total amount of shared memory that has been used for HeapToShared.
+ unsigned SharedMemoryUsed = 0;
};
struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
@@ -3137,12 +3167,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
Function *Fn = getAnchorScope();
- if (!OMPInfoCache.Kernels.count(Fn))
- return;
-
- // Add itself to the reaching kernel and set IsKernelEntry.
- ReachingKernelEntries.insert(Fn);
- IsKernelEntry = true;
OMPInformationCache::RuntimeFunctionInfo &InitRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
@@ -3176,10 +3200,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
Fn);
// Ignore kernels without initializers such as global constructors.
- if (!KernelInitCB || !KernelDeinitCB) {
- indicateOptimisticFixpoint();
+ if (!KernelInitCB || !KernelDeinitCB)
return;
- }
+
+ // Add itself to the reaching kernel and set IsKernelEntry.
+ ReachingKernelEntries.insert(Fn);
+ IsKernelEntry = true;
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
@@ -3345,8 +3371,17 @@ struct AAKernelInfoFunction : AAKernelInfo {
return false;
}
- // Check if the kernel is already in SPMD mode, if so, return success.
+ // Get the actual kernel, could be the caller of the anchor scope if we have
+ // a debug wrapper.
Function *Kernel = getAnchorScope();
+ if (Kernel->hasLocalLinkage()) {
+ assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
+ auto *CB = cast<CallBase>(Kernel->user_back());
+ Kernel = CB->getCaller();
+ }
+ assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
+
+ // Check if the kernel is already in SPMD mode, if so, return success.
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
@@ -3711,9 +3746,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
// __kmpc_get_hardware_num_threads_in_block();
// WarpSize = __kmpc_get_warp_size();
// BlockSize = BlockHwSize - WarpSize;
- // if (InitCB >= BlockSize) return;
- // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
+ // IsWorkerCheckBB: bool IsWorker = InitCB != -1;
// if (IsWorker) {
+ // if (InitCB >= BlockSize) return;
// SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
@@ -3770,6 +3805,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
InitBB->getTerminator()->eraseFromParent();
+ Instruction *IsWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
+ ConstantInt::get(KernelInitCB->getType(), -1),
+ "thread.is_worker", InitBB);
+ IsWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
+
Module &M = *Kernel->getParent();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
@@ -3779,29 +3821,22 @@ struct AAKernelInfoFunction : AAKernelInfo {
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_warp_size);
CallInst *BlockHwSize =
- CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
BlockHwSize->setDebugLoc(DLoc);
- CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ CallInst *WarpSize =
+ CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
WarpSize->setDebugLoc(DLoc);
- Instruction *BlockSize =
- BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ Instruction *BlockSize = BinaryOperator::CreateSub(
+ BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
BlockSize->setDebugLoc(DLoc);
- Instruction *IsMainOrWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
- BlockSize, "thread.is_main_or_worker", InitBB);
+ Instruction *IsMainOrWorker = ICmpInst::Create(
+ ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
+ "thread.is_main_or_worker", IsWorkerCheckBB);
IsMainOrWorker->setDebugLoc(DLoc);
- BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
- InitBB);
-
- Instruction *IsWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
- ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", IsWorkerCheckBB);
- IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
- IsWorkerCheckBB);
+ BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
+ IsMainOrWorker, IsWorkerCheckBB);
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
@@ -4241,10 +4276,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
unsigned ScheduleTypeVal =
ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
switch (OMPScheduleType(ScheduleTypeVal)) {
- case OMPScheduleType::Static:
- case OMPScheduleType::StaticChunked:
- case OMPScheduleType::Distribute:
- case OMPScheduleType::DistributeChunked:
+ case OMPScheduleType::UnorderedStatic:
+ case OMPScheduleType::UnorderedStaticChunked:
+ case OMPScheduleType::OrderedDistribute:
+ case OMPScheduleType::OrderedDistributeChunked:
break;
default:
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
@@ -4390,7 +4425,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
std::string Str("simplified value: ");
- if (!SimplifiedValue.hasValue())
+ if (!SimplifiedValue)
return Str + std::string("none");
if (!SimplifiedValue.getValue())
@@ -4420,8 +4455,8 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
IRPosition::callsite_returned(CB),
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
- assert((isValidState() || (SimplifiedValue.hasValue() &&
- SimplifiedValue.getValue() == nullptr)) &&
+ assert((isValidState() ||
+ (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) &&
"Unexpected invalid state!");
if (!isAtFixpoint()) {
@@ -4461,9 +4496,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
+ if (SimplifiedValue && *SimplifiedValue) {
Instruction &I = *getCtxI();
- A.changeValueAfterManifest(I, **SimplifiedValue);
+ A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
A.deleteAfterManifest(I);
CallBase *CB = dyn_cast<CallBase>(&I);
@@ -4549,7 +4584,7 @@ private:
// We have empty reaching kernels, therefore we cannot tell if the
// associated call site can be folded. At this moment, SimplifiedValue
// must be none.
- assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none");
+ assert(!SimplifiedValue && "SimplifiedValue should be none");
}
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
@@ -4592,7 +4627,7 @@ private:
return indicatePessimisticFixpoint();
if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
- assert(!SimplifiedValue.hasValue() &&
+ assert(!SimplifiedValue &&
"SimplifiedValue should keep none at this point");
return ChangeStatus::UNCHANGED;
}
@@ -4700,18 +4735,23 @@ void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
void OpenMPOpt::registerAAs(bool IsModulePass) {
if (SCC.empty())
-
return;
+
if (IsModulePass) {
// Ensure we create the AAKernelInfo AAs first and without triggering an
// update. This will make sure we register all value simplification
// callbacks before any other AA has the chance to create an AAValueSimplify
// or similar.
- for (Function *Kernel : OMPInfoCache.Kernels)
+ auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
A.getOrCreateAAFor<AAKernelInfo>(
- IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
+ IRPosition::function(Kernel), /* QueryingAA */ nullptr,
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
+ return false;
+ };
+ OMPInformationCache::RuntimeFunctionInfo &InitRFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
+ InitRFI.foreachUse(SCC, CreateKernelInfoCB);
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
@@ -4899,6 +4939,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
KernelSet Kernels = getDeviceKernels(M);
+ if (PrintModuleBeforeOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
+
auto IsCalled = [&](Function &F) {
if (Kernels.contains(&F))
return true;
@@ -4958,8 +5001,15 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
@@ -5001,6 +5051,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
Module &M = *C.begin()->getFunction().getParent();
+ if (PrintModuleBeforeOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
+
KernelSet Kernels = getDeviceKernels(M);
FunctionAnalysisManager &FAM =
@@ -5022,8 +5075,16 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
@@ -5093,8 +5154,16 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Result = OMPOpt.run(false);
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 5f2223e4047e..54c72bdbb203 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -14,7 +14,6 @@
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -40,6 +39,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -55,8 +55,6 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <functional>
-#include <iterator>
#include <memory>
#include <tuple>
#include <vector>
@@ -99,7 +97,7 @@ static cl::opt<bool>
// This is an option used by testing:
static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
- cl::init(false), cl::ZeroOrMore,
+
cl::ReallyHidden,
cl::desc("Skip Cost Analysis"));
// Used to determine if a cold region is worth outlining based on
@@ -129,7 +127,7 @@ static cl::opt<unsigned> MaxNumInlineBlocks(
// Command line option to set the maximum number of partial inlining allowed
// for the module. The default value of -1 means no limit.
static cl::opt<int> MaxNumPartialInlining(
- "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
+ "max-partial-inlining", cl::init(-1), cl::Hidden,
cl::desc("Max number of partial inlining. The default is unlimited"));
// Used only when PGO or user annotated branch data is absent. It is
@@ -137,7 +135,7 @@ static cl::opt<int> MaxNumPartialInlining(
// produces larger value, the BFI value will be used.
static cl::opt<int>
OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
- cl::Hidden, cl::ZeroOrMore,
+ cl::Hidden,
cl::desc("Relative frequency of outline region to "
"the entry block"));
@@ -169,7 +167,7 @@ struct FunctionOutliningInfo {
};
struct FunctionOutliningMultiRegionInfo {
- FunctionOutliningMultiRegionInfo() {}
+ FunctionOutliningMultiRegionInfo() = default;
// Container for outline regions
struct OutlineRegionInfo {
@@ -440,7 +438,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
};
auto BBProfileCount = [BFI](BasicBlock *BB) {
- return BFI->getBlockProfileCount(BB).getValueOr(0);
+ return BFI->getBlockProfileCount(BB).value_or(0);
};
// Use the same computeBBInlineCost function to compute the cost savings of
@@ -741,7 +739,7 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
- if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
+ if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be conservative in
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 74f68531b89a..ae787be40c55 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -15,19 +15,13 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Target/CGPassBuilderOption.h"
@@ -41,22 +35,16 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
-#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
-#include "llvm/Transforms/Vectorize/LoopVectorize.h"
-#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
-#include "llvm/Transforms/Vectorize/VectorCombine.h"
using namespace llvm;
namespace llvm {
-cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
+cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::Hidden,
cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
@@ -111,8 +99,8 @@ static cl::opt<bool>
EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable performing ThinLTO."));
-cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
- cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
+cl::opt<bool> EnableHotColdSplit("hot-cold-split",
+ cl::desc("Enable hot-cold splitting pass"));
cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
cl::desc("Enable ir outliner pass"));
@@ -126,12 +114,12 @@ cl::opt<bool>
cl::desc("Disable pre-instrumentation inliner"));
cl::opt<int> PreInlineThreshold(
- "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
+ "preinline-threshold", cl::Hidden, cl::init(75),
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
cl::opt<bool>
- EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
+ EnableGVNHoist("enable-gvn-hoist",
cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
@@ -139,13 +127,8 @@ static cl::opt<bool>
cl::Hidden,
cl::desc("Disable shrink-wrap library calls"));
-static cl::opt<bool> EnableSimpleLoopUnswitch(
- "enable-simple-loop-unswitch", cl::init(false), cl::Hidden,
- cl::desc("Enable the simple loop unswitch pass. Also enables independent "
- "cleanup passes integrated into the loop pass manager pipeline."));
-
cl::opt<bool>
- EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
+ EnableGVNSink("enable-gvn-sink",
cl::desc("Enable the GVN sinking pass (default = off)"));
// This option is used in simplifying testing SampleFDO optimizations for
@@ -336,59 +319,6 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createEarlyCSEPass());
}
-// Do PGO instrumentation generation or use pass as the option specified.
-void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
- bool IsCS = false) {
- if (IsCS) {
- if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse)
- return;
- } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
- return;
-
- // Perform the preinline and cleanup passes for O1 and above.
- // We will not do this inline for context sensitive PGO (when IsCS is true).
- if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
- // Create preinline pass. We construct an InlineParams object and specify
- // the threshold here to avoid the command line options of the regular
- // inliner to influence pre-inlining. The only fields of InlineParams we
- // care about are DefaultThreshold and HintThreshold.
- InlineParams IP;
- IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes
- // the instrumented binary unusably large. Even if PreInlineThreshold is not
- // correct thresold for -Oz, it is better than not running preinliner.
- IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325;
-
- MPM.add(createFunctionInliningPass(IP));
- MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
- MPM.add(createInstructionCombiningPass()); // Combine silly seq's
- addExtensionsToPM(EP_Peephole, MPM);
- }
- if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) {
- MPM.add(createPGOInstrumentationGenLegacyPass(IsCS));
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!PGOInstrGen.empty())
- Options.InstrProfileOutput = PGOInstrGen;
- Options.DoCounterPromotion = true;
- Options.UseBFIInPromotion = IsCS;
- MPM.add(createLoopRotatePass());
- MPM.add(createInstrProfilingLegacyPass(Options, IsCS));
- }
- if (!PGOInstrUse.empty())
- MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS));
- // Indirect call promotion that promotes intra-module targets only.
- // For ThinLTO this is done earlier due to interactions with globalopt
- // for imported functions. We don't run this at -O0.
- if (OptLevel > 0 && !IsCS)
- MPM.add(
- createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
-}
void PassManagerBuilder::addFunctionSimplificationPasses(
legacy::PassManagerBase &MPM) {
// Start of function pass.
@@ -404,7 +334,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
MPM.add(createGVNSinkPass());
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
}
}
@@ -418,7 +349,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createJumpThreadingPass()); // Thread jumps.
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
}
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
// Combine silly seq's
if (OptLevel > 2)
MPM.add(createAggressiveInstCombinerPass());
@@ -427,14 +360,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
- // Optimize memory intrinsic calls based on the profiled size information.
- if (SizeLevel == 0)
- MPM.add(createPGOMemOPSizeOptLegacyPass());
-
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
if (OptLevel > 1)
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// The matrix extension can introduce large vector operations early, which can
@@ -443,29 +374,32 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createVectorCombinePass());
// Begin the loop pass pipeline.
- if (EnableSimpleLoopUnswitch) {
- // The simple loop unswitch pass relies on separate cleanup passes. Schedule
- // them first so when we re-process a loop they run before other loop
- // passes.
- MPM.add(createLoopInstSimplifyPass());
- MPM.add(createLoopSimplifyCFGPass());
- }
+
+ // The simple loop unswitch pass relies on separate cleanup passes. Schedule
+ // them first so when we re-process a loop they run before other loop
+ // passes.
+ MPM.add(createLoopInstSimplifyPass());
+ MPM.add(createLoopSimplifyCFGPass());
+
// Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
+ // to reduce amount of IR that will have to be duplicated. However,
+ // do not perform speculative hoisting the first time as LICM
+ // will destroy metadata that may not need to be destroyed if run
+ // after loop rotation.
// TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/false));
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- if (EnableSimpleLoopUnswitch)
- MPM.add(createSimpleLoopUnswitchLegacyPass());
- else
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
+ MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3));
// FIXME: We break the loop pass pipeline here in order to do full
// simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
if (EnableLoopFlatten) {
@@ -521,7 +455,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// TODO: Investigate if this is too expensive at O1.
if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -580,9 +515,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
PM.add(createEarlyCSEPass());
PM.add(createCorrelatedValuePropagationPass());
PM.add(createInstructionCombiningPass());
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
- PM.add(createCFGSimplificationPass());
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
+ PM.add(createSimpleLoopUnswitchLegacyPass());
+ PM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
PM.add(createInstructionCombiningPass());
}
@@ -597,6 +534,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// before SLP vectorization.
PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
+ .convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
@@ -641,7 +579,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
PM.add(createWarnMissedTransformationsPass());
@@ -657,10 +596,6 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
- // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
- // is handled separately, so just check this is not the ThinLTO post-link.
- bool DefaultOrPreLinkPipeline = !PerformThinLTO;
-
MPM.add(createAnnotation2MetadataLegacyPass());
if (!PGOSampleUse.empty()) {
@@ -678,7 +613,6 @@ void PassManagerBuilder::populateModulePassManager(
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
- addPGOInstrPasses(MPM);
if (Inliner) {
MPM.add(Inliner);
Inliner = nullptr;
@@ -732,8 +666,6 @@ void PassManagerBuilder::populateModulePassManager(
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
if (PerformThinLTO) {
- MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
- !PGOSampleUse.empty()));
MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
}
@@ -772,20 +704,9 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
-
- // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
- // call promotion as it will change the CFG too much to make the 2nd
- // profile annotation in backend more difficult.
- // PGO instrumentation is added during the compile phase for ThinLTO, do
- // not run it a second time
- if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
- addPGOInstrPasses(MPM);
-
- // Create profile COMDAT variables. Lld linker wants to see all variables
- // before the LTO/ThinLTO link since it needs to resolve symbols/comdats.
- if (!PerformThinLTO && EnablePGOCSInstrGen)
- MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen));
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Clean up after IPCP & DAE
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive
@@ -811,8 +732,6 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createOpenMPOptCGSCCLegacyPass());
MPM.add(createPostOrderFunctionAttrsLegacyPass());
- if (OptLevel > 2)
- MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
addFunctionSimplificationPasses(MPM);
@@ -837,14 +756,6 @@ void PassManagerBuilder::populateModulePassManager(
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
- // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass
- // for LTO and ThinLTO -- The actual pass will be called after all inlines
- // are performed.
- // Need to do this after COMDAT variables have been eliminated,
- // (i.e. after EliminateAvailableExternallyPass).
- if (!(PrepareForLTO || PrepareForThinLTO))
- addPGOInstrPasses(MPM, /* IsCS */ true);
-
if (EnableOrderFileInstrumentation)
MPM.add(createInstrOrderFilePass());
@@ -886,7 +797,8 @@ void PassManagerBuilder::populateModulePassManager(
// later might get benefit of no-alias assumption in clone loop.
if (UseLoopVersioningLICM) {
MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
// We add a fresh GlobalsModRef run at this point. This is particularly
@@ -972,7 +884,8 @@ void PassManagerBuilder::populateModulePassManager(
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
addExtensionsToPM(EP_OptimizerLast, MPM);
@@ -1009,13 +922,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Split call-site with more constrained arguments.
PM.add(createCallSiteSplittingPass());
- // Indirect call promotion. This should promote all the targets that are
- // left by the earlier promotion pass that promotes intra-module targets.
- // This two-step promotion is to save the compile time. For LTO, it should
- // produce the same result as if we only do promotion here.
- PM.add(
- createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
-
// Propage constant function arguments by specializing the functions.
if (EnableFunctionSpecialization && OptLevel > 2)
PM.add(createFunctionSpecializationPass());
@@ -1081,9 +987,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createPruneEHPass()); // Remove dead EH info.
- // CSFDO instrumentation and use pass.
- addPGOInstrPasses(PM, /* IsCS */ true);
-
// Infer attributes on declarations, call sites, arguments, etc. for an SCC.
if (AttributorRun & AttributorRunOption::CGSCC)
PM.add(createAttributorCGSCCLegacyPass());
@@ -1098,14 +1001,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalOptimizerPass());
PM.add(createGlobalDCEPass()); // Remove dead functions.
- // If we didn't decide to inline a function, check to see if we can
- // transform it to pass arguments by value instead of by reference.
- PM.add(createArgumentPromotionPass());
-
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
+ PM.add(createJumpThreadingPass());
// Break up allocas
PM.add(createSROAPass());
@@ -1120,7 +1019,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -1149,7 +1049,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
+ PM.add(createJumpThreadingPass());
}
void PassManagerBuilder::addLateLTOOptimizationPasses(
@@ -1175,80 +1075,6 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
PM.add(createMergeFunctionsPass());
}
-void PassManagerBuilder::populateThinLTOPassManager(
- legacy::PassManagerBase &PM) {
- PerformThinLTO = true;
- if (LibraryInfo)
- PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- if (VerifyInput)
- PM.add(createVerifierPass());
-
- if (ImportSummary) {
- // This pass imports type identifier resolutions for whole-program
- // devirtualization and CFI. It must run early because other passes may
- // disturb the specific instruction patterns that these passes look for,
- // creating dependencies on resolutions that may not appear in the summary.
- //
- // For example, GVN may transform the pattern assume(type.test) appearing in
- // two basic blocks into assume(phi(type.test, type.test)), which would
- // transform a dependency on a WPD resolution into a dependency on a type
- // identifier resolution for CFI.
- //
- // Also, WPD has access to more precise information than ICP and can
- // devirtualize more effectively, so it should operate on the IR first.
- PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
- PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
- }
-
- populateModulePassManager(PM);
-
- if (VerifyOutput)
- PM.add(createVerifierPass());
- PerformThinLTO = false;
-}
-
-void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
- if (LibraryInfo)
- PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- if (VerifyInput)
- PM.add(createVerifierPass());
-
- addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
-
- if (OptLevel != 0)
- addLTOOptimizationPasses(PM);
- else {
- // The whole-program-devirt pass needs to run at -O0 because only it knows
- // about the llvm.type.checked.load intrinsic: it needs to both lower the
- // intrinsic itself and handle it in the summary.
- PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
- }
-
- // Create a function that performs CFI checks for cross-DSO calls with targets
- // in the current module.
- PM.add(createCrossDSOCFIPass());
-
- // Lower type metadata and the type.test intrinsic. This pass supports Clang's
- // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
- // link time if CFI is enabled. The pass does nothing if CFI is disabled.
- PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO pipeline).
- PM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
-
- if (OptLevel != 0)
- addLateLTOOptimizationPasses(PM);
-
- addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
-
- PM.add(createAnnotationRemarksLegacyPass());
-
- if (VerifyOutput)
- PM.add(createVerifierPass());
-}
-
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
@@ -1314,18 +1140,3 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
legacy::PassManagerBase *MPM = unwrap(PM);
Builder->populateModulePassManager(*MPM);
}
-
-void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM,
- LLVMBool Internalize,
- LLVMBool RunInliner) {
- PassManagerBuilder *Builder = unwrap(PMB);
- legacy::PassManagerBase *LPM = unwrap(PM);
-
- // A small backwards compatibility hack. populateLTOPassManager used to take
- // an RunInliner option.
- if (RunInliner && !Builder->Inliner)
- Builder->Inliner = createFunctionInliningPass();
-
- Builder->populateLTOPassManager(*LPM);
-}
diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp
index 39de19ca9e9d..e0836a9fd699 100644
--- a/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
@@ -24,9 +23,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -246,7 +243,7 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) {
}
if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
}
if (TokenInst) {
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index 5779553ee732..26fb7d676429 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -18,6 +18,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar/SCCP.h"
+#include "llvm/Transforms/Utils/SCCPSolver.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 7334bf695b67..6859953de962 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -14,7 +14,8 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/ProfileData/SampleProf.h"
#include <map>
#include <queue>
@@ -62,23 +63,24 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
return ChildNodeRet;
}
-ContextTrieNode &ContextTrieNode::moveToChildContext(
- const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
- uint32_t ContextFramesToRemove, bool DeleteNode) {
+ContextTrieNode &
+SampleContextTracker::moveContextSamples(ContextTrieNode &ToNodeParent,
+ const LineLocation &CallSite,
+ ContextTrieNode &&NodeToMove) {
uint64_t Hash =
FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite);
+ std::map<uint64_t, ContextTrieNode> &AllChildContext =
+ ToNodeParent.getAllChildContext();
assert(!AllChildContext.count(Hash) && "Node to remove must exist");
- LineLocation OldCallSite = NodeToMove.CallSiteLoc;
- ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
AllChildContext[Hash] = NodeToMove;
ContextTrieNode &NewNode = AllChildContext[Hash];
- NewNode.CallSiteLoc = CallSite;
+ NewNode.setCallSiteLoc(CallSite);
// Walk through nodes in the moved the subtree, and update
// FunctionSamples' context as for the context promotion.
// We also need to set new parant link for all children.
std::queue<ContextTrieNode *> NodeToUpdate;
- NewNode.setParentContext(this);
+ NewNode.setParentContext(&ToNodeParent);
NodeToUpdate.push(&NewNode);
while (!NodeToUpdate.empty()) {
@@ -87,10 +89,8 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
FunctionSamples *FSamples = Node->getFunctionSamples();
if (FSamples) {
- FSamples->getContext().promoteOnPath(ContextFramesToRemove);
+ setContextNode(FSamples, Node);
FSamples->getContext().setState(SyntheticContext);
- LLVM_DEBUG(dbgs() << " Context promoted to: "
- << FSamples->getContext().toString() << "\n");
}
for (auto &It : Node->getAllChildContext()) {
@@ -100,10 +100,6 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
}
}
- // Original context no longer needed, destroy if requested.
- if (DeleteNode)
- OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName());
-
return NewNode;
}
@@ -131,7 +127,7 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; }
void ContextTrieNode::addFunctionSize(uint32_t FSize) {
- if (!FuncSize.hasValue())
+ if (!FuncSize)
FuncSize = 0;
FuncSize = FuncSize.getValue() + FSize;
@@ -147,6 +143,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
ParentContext = Parent;
}
+void ContextTrieNode::setCallSiteLoc(const LineLocation &Loc) {
+ CallSiteLoc = Loc;
+}
+
void ContextTrieNode::dumpNode() {
dbgs() << "Node: " << FuncName << "\n"
<< " Callsite: " << CallSiteLoc << "\n"
@@ -202,13 +202,23 @@ SampleContextTracker::SampleContextTracker(
SampleContext Context = FuncSample.first;
LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
<< "\n");
- if (!Context.isBaseContext())
- FuncToCtxtProfiles[Context.getName()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
NewNode->setFunctionSamples(FSamples);
}
+ populateFuncToCtxtMap();
+}
+
+void SampleContextTracker::populateFuncToCtxtMap() {
+ for (auto *Node : *this) {
+ FunctionSamples *FSamples = Node->getFunctionSamples();
+ if (FSamples) {
+ FSamples->getContext().setState(RawContext);
+ setContextNode(FSamples, Node);
+ FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples);
+ }
+ }
}
FunctionSamples *
@@ -231,7 +241,7 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
LLVM_DEBUG(if (FSamples) {
- dbgs() << " Callee context found: " << FSamples->getContext().toString()
+ dbgs() << " Callee context found: " << getContextString(CalleeContext)
<< "\n";
});
return FSamples;
@@ -333,7 +343,7 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
continue;
- ContextTrieNode *FromNode = getContextFor(Context);
+ ContextTrieNode *FromNode = getContextNodeForProfile(CSamples);
if (FromNode == Node)
continue;
@@ -354,7 +364,7 @@ void SampleContextTracker::markContextSamplesInlined(
const FunctionSamples *InlinedSamples) {
assert(InlinedSamples && "Expect non-null inlined samples");
LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
- << InlinedSamples->getContext().toString() << "\n");
+ << getContextString(*InlinedSamples) << "\n");
InlinedSamples->getContext().setState(InlinedContext);
}
@@ -405,17 +415,43 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
// the context profile in the base (context-less) profile.
FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
assert(FromSamples && "Shouldn't promote a context without profile");
+ (void)FromSamples; // Unused in release build.
+
LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
- << FromSamples->getContext().toString() << "\n");
+ << getContextString(&NodeToPromo) << "\n");
assert(!FromSamples->getContext().hasState(InlinedContext) &&
"Shouldn't promote inlined context profile");
- uint32_t ContextFramesToRemove =
- FromSamples->getContext().getContextFrames().size() - 1;
- return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
- ContextFramesToRemove);
+ return promoteMergeContextSamplesTree(NodeToPromo, RootContext);
+}
+
+#ifndef NDEBUG
+std::string
+SampleContextTracker::getContextString(const FunctionSamples &FSamples) const {
+ return getContextString(getContextNodeForProfile(&FSamples));
}
+std::string
+SampleContextTracker::getContextString(ContextTrieNode *Node) const {
+ SampleContextFrameVector Res;
+ if (Node == &RootContext)
+ return std::string();
+ Res.emplace_back(Node->getFuncName(), LineLocation(0, 0));
+
+ ContextTrieNode *PreNode = Node;
+ Node = Node->getParentContext();
+ while (Node && Node != &RootContext) {
+ Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc());
+ PreNode = Node;
+ Node = Node->getParentContext();
+ }
+
+ std::reverse(Res.begin(), Res.end());
+
+ return SampleContext::getContextString(Res);
+}
+#endif
+
void SampleContextTracker::dump() { RootContext.dumpTree(); }
StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
@@ -526,8 +562,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
}
void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
- ContextTrieNode &ToNode,
- uint32_t ContextFramesToRemove) {
+ ContextTrieNode &ToNode) {
FunctionSamples *FromSamples = FromNode.getFunctionSamples();
FunctionSamples *ToSamples = ToNode.getFunctionSamples();
if (FromSamples && ToSamples) {
@@ -540,16 +575,13 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
} else if (FromSamples) {
// Transfer FromSamples from FromNode to ToNode
ToNode.setFunctionSamples(FromSamples);
+ setContextNode(FromSamples, &ToNode);
FromSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().promoteOnPath(ContextFramesToRemove);
- FromNode.setFunctionSamples(nullptr);
}
}
ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
- ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
- uint32_t ContextFramesToRemove) {
- assert(ContextFramesToRemove && "Context to remove can't be empty");
+ ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) {
// Ignore call site location if destination is top level under root
LineLocation NewCallSiteLoc = LineLocation(0, 0);
@@ -566,22 +598,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
if (!ToNode) {
// Do not delete node to move from its parent here because
// caller is iterating over children of that parent node.
- ToNode = &ToNodeParent.moveToChildContext(
- NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false);
+ ToNode =
+ &moveContextSamples(ToNodeParent, NewCallSiteLoc, std::move(FromNode));
+ LLVM_DEBUG({
+ dbgs() << " Context promoted and merged to: " << getContextString(ToNode)
+ << "\n";
+ });
} else {
// Destination node exists, merge samples for the context tree
- mergeContextNode(FromNode, *ToNode, ContextFramesToRemove);
+ mergeContextNode(FromNode, *ToNode);
LLVM_DEBUG({
if (ToNode->getFunctionSamples())
dbgs() << " Context promoted and merged to: "
- << ToNode->getFunctionSamples()->getContext().toString() << "\n";
+ << getContextString(ToNode) << "\n";
});
// Recursively promote and merge children
for (auto &It : FromNode.getAllChildContext()) {
ContextTrieNode &FromChildNode = It.second;
- promoteMergeContextSamplesTree(FromChildNode, *ToNode,
- ContextFramesToRemove);
+ promoteMergeContextSamplesTree(FromChildNode, *ToNode);
}
// Remove children once they're all merged
@@ -594,4 +629,14 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
return *ToNode;
}
+
+void SampleContextTracker::createContextLessProfileMap(
+ SampleProfileMap &ContextLessProfiles) {
+ for (auto *Node : *this) {
+ FunctionSamples *FProfile = Node->getFunctionSamples();
+ // Profile's context can be empty, use ContextNode's func name.
+ if (FProfile)
+ ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
+ }
+}
} // namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bc6051de90c4..40de69bbf2cf 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -25,11 +25,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
@@ -38,22 +35,16 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
@@ -64,6 +55,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -73,9 +65,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
@@ -84,7 +74,6 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
@@ -151,8 +140,7 @@ static cl::opt<bool> ProfileSampleBlockAccurate(
"them conservatively as unknown. "));
static cl::opt<bool> ProfileAccurateForSymsInList(
- "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
- cl::init(true),
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
cl::desc("For symbols in profile symbol list, regard their profiles to "
"be accurate. It may be overriden by profile-sample-accurate. "));
@@ -183,6 +171,15 @@ static cl::opt<bool> ProfileSizeInline(
cl::desc("Inline cold call sites in profile loader if it's beneficial "
"for code size."));
+// Since profiles are consumed by many passes, turning on this option has
+// side effects. For instance, pre-link SCC inliner would see merged profiles
+// and inline the hot functions (that are skipped in this pass).
+static cl::opt<bool> DisableSampleLoaderInlining(
+ "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
+ cl::desc("If true, artifically skip inline transformation in sample-loader "
+ "pass, and merge (or scale) profiles (as configured by "
+ "--sample-profile-merge-inlinee)."));
+
cl::opt<int> ProfileInlineGrowthLimit(
"sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
cl::desc("The size growth ratio limit for proirity-based sample profile "
@@ -219,19 +216,19 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
"Skip relative hotness check for ICP up to given number of targets."));
static cl::opt<bool> CallsitePrioritizedInline(
- "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "sample-profile-prioritized-inline", cl::Hidden,
+
cl::desc("Use call site prioritized inlining for sample profile loader."
"Currently only CSSPGO is supported."));
static cl::opt<bool> UsePreInlinerDecision(
- "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "sample-profile-use-preinliner", cl::Hidden,
+
cl::desc("Use the preinliner decisions stored in profile context."));
static cl::opt<bool> AllowRecursiveInline(
- "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "sample-profile-recursive-inline", cl::Hidden,
+
cl::desc("Allow sample loader inliner to inline recursive calls."));
static cl::opt<std::string> ProfileInlineReplayFile(
@@ -287,7 +284,6 @@ static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
static cl::opt<unsigned>
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
- cl::ZeroOrMore,
cl::desc("Max number of promotions for a single indirect "
"call callsite in sample profile loader"));
@@ -295,6 +291,13 @@ static cl::opt<bool> OverwriteExistingWeights(
"overwrite-existing-weights", cl::Hidden, cl::init(false),
cl::desc("Ignore existing branch weights on IR and always overwrite."));
+static cl::opt<bool> AnnotateSampleProfileInlinePhase(
+ "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
+ cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
+ "sample-profile inline pass name."));
+
+extern cl::opt<bool> EnableExtTspBlockPlacement;
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -425,7 +428,11 @@ public:
: SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
- LTOPhase(LTOPhase) {}
+ LTOPhase(LTOPhase),
+ AnnotatedPassName(AnnotateSampleProfileInlinePhase
+ ? llvm::AnnotateInlinePassName(InlineContext{
+ LTOPhase, InlinePass::SampleProfileInliner})
+ : CSINLINE_DEBUG) {}
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -487,15 +494,13 @@ protected:
/// Profile tracker for different context.
std::unique_ptr<SampleContextTracker> ContextTracker;
- /// Flag indicating whether input profile is context-sensitive
- bool ProfileIsCSFlat = false;
-
/// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
///
/// We need to know the LTO phase because for example in ThinLTOPrelink
/// phase, in annotation, we should not promote indirect calls. Instead,
/// we will mark GUIDs that needs to be annotated to the function.
- ThinOrFullLTOPhase LTOPhase;
+ const ThinOrFullLTOPhase LTOPhase;
+ const std::string AnnotatedPassName;
/// Profle Symbol list tells whether a function name appears in the binary
/// used to generate the current profile.
@@ -535,6 +540,11 @@ protected:
// A pseudo probe helper to correlate the imported sample counts.
std::unique_ptr<PseudoProbeManager> ProbeManager;
+
+private:
+ const char *getAnnotatedRemarkPassName() const {
+ return AnnotatedPassName.c_str();
+ }
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -605,7 +615,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// call instruction should have 0 count.
// For CS profile, the callsite count of previously inlined callees is
// populated with the entry count of the callees.
- if (!ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
if (const auto *CB = dyn_cast<CallBase>(&Inst))
if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
return 0;
@@ -644,7 +654,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
// call instruction should have 0 count.
// For CS profile, the callsite count of previously inlined callees is
// populated with the entry count of the callees.
- if (!ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
if (const auto *CB = dyn_cast<CallBase>(&Inst))
if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
return 0;
@@ -698,7 +708,7 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
if (Function *Callee = Inst.getCalledFunction())
CalleeName = Callee->getName();
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
const FunctionSamples *FS = findFunctionSamples(Inst);
@@ -730,7 +740,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
FunctionSamples::getGUID(R->getName());
};
- if (ProfileIsCSFlat) {
+ if (FunctionSamples::ProfileIsCS) {
auto CalleeSamples =
ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
if (CalleeSamples.empty())
@@ -783,7 +793,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
if (it.second) {
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
it.first->second = ContextTracker->getContextSamplesFor(DIL);
else
it.first->second =
@@ -839,6 +849,13 @@ static void
updateIDTMetaData(Instruction &Inst,
const SmallVectorImpl<InstrProfValueData> &CallTargets,
uint64_t Sum) {
+ // Bail out early if MaxNumPromotions is zero.
+ // This prevents allocating an array of zero length below.
+ //
+ // Note `updateIDTMetaData` is called in two places so check
+ // `MaxNumPromotions` inside it.
+ if (MaxNumPromotions == 0)
+ return;
uint32_t NumVals = 0;
// OldSum is the existing total count in the value profile data.
uint64_t OldSum = 0;
@@ -922,6 +939,14 @@ updateIDTMetaData(Instruction &Inst,
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
SmallVector<CallBase *, 8> *InlinedCallSite) {
+ // Bail out early if sample-loader inliner is disabled.
+ if (DisableSampleLoaderInlining)
+ return false;
+
+ // Bail out early if MaxNumPromotions is zero.
+ // This prevents allocating an array of zero length in callees below.
+ if (MaxNumPromotions == 0)
+ return false;
auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
auto R = SymbolMap.find(CalleeFunctionName);
if (R == SymbolMap.end() || !R->getValue())
@@ -1009,8 +1034,9 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
for (auto I : Candidates) {
Function *CalledFunction = I->getCalledFunction();
if (CalledFunction) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
- I->getDebugLoc(), I->getParent())
+ ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
+ "InlineAttempt", I->getDebugLoc(),
+ I->getParent())
<< "previous inlining reattempted for "
<< (Hot ? "hotness: '" : "size: '")
<< ore::NV("Callee", CalledFunction) << "' into '"
@@ -1042,13 +1068,12 @@ void SampleProfileLoader::findExternalInlineCandidate(
// For AutoFDO profile, retrieve candidate profiles by walking over
// the nested inlinee profiles.
- if (!ProfileIsCSFlat) {
+ if (!FunctionSamples::ProfileIsCS) {
Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
return;
}
- ContextTrieNode *Caller =
- ContextTracker->getContextFor(Samples->getContext());
+ ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
std::queue<ContextTrieNode *> CalleeList;
CalleeList.push(Caller);
while (!CalleeList.empty()) {
@@ -1098,11 +1123,20 @@ void SampleProfileLoader::findExternalInlineCandidate(
/// Iteratively inline hot callsites of a function.
///
-/// Iteratively traverse all callsites of the function \p F, and find if
-/// the corresponding inlined instance exists and is hot in profile. If
-/// it is hot enough, inline the callsites and adds new callsites of the
-/// callee into the caller. If the call is an indirect call, first promote
-/// it to direct call. Each indirect call is limited with a single target.
+/// Iteratively traverse all callsites of the function \p F, so as to
+/// find out callsites with corresponding inline instances.
+///
+/// For such callsites,
+/// - If it is hot enough, inline the callsites and adds callsites of the callee
+/// into the caller. If the call is an indirect call, first promote
+/// it to direct call. Each indirect call is limited with a single target.
+///
+/// - If a callsite is not inlined, merge the its profile to the outline
+/// version (if --sample-profile-merge-inlinee is true), or scale the
+/// counters of standalone function based on the profile of inlined
+/// instances (if --sample-profile-merge-inlinee is false).
+///
+/// Later passes may consume the updated profiles.
///
/// \param F function to perform iterative inlining.
/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
@@ -1137,7 +1171,7 @@ bool SampleProfileLoader::inlineHotFunctions(
assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCSFlat)
+ if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS)
LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
Hot = true;
@@ -1200,13 +1234,17 @@ bool SampleProfileLoader::inlineHotFunctions(
// For CS profile, profile for not inlined context will be merged when
// base profile is being retrieved.
- if (!FunctionSamples::ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
return Changed;
}
bool SampleProfileLoader::tryInlineCandidate(
InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
+ // Do not attempt to inline a candidate if
+ // --disable-sample-loader-inlining is true.
+ if (DisableSampleLoaderInlining)
+ return false;
CallBase &CB = *Candidate.CallInstr;
Function *CalledFunction = CB.getCalledFunction();
@@ -1216,7 +1254,8 @@ bool SampleProfileLoader::tryInlineCandidate(
InlineCost Cost = shouldInlineCandidate(Candidate);
if (Cost.isNever()) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+ ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
+ "InlineFail", DLoc, BB)
<< "incompatible inlining");
return false;
}
@@ -1226,45 +1265,45 @@ bool SampleProfileLoader::tryInlineCandidate(
InlineFunctionInfo IFI(nullptr, GetAC);
IFI.UpdateProfile = false;
- if (InlineFunction(CB, IFI).isSuccess()) {
- // Merge the attributes based on the inlining.
- AttributeFuncs::mergeAttributesForInlining(*BB->getParent(),
- *CalledFunction);
+ if (!InlineFunction(CB, IFI).isSuccess())
+ return false;
- // The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
- *BB->getParent(), Cost, true, CSINLINE_DEBUG);
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(*BB->getParent(),
+ *CalledFunction);
- // Now populate the list of newly exposed call sites.
- if (InlinedCallSites) {
- InlinedCallSites->clear();
- for (auto &I : IFI.InlinedCallSites)
- InlinedCallSites->push_back(I);
- }
+ // The call to InlineFunction erases I, so we can't pass it here.
+ emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
+ Cost, true, getAnnotatedRemarkPassName());
- if (ProfileIsCSFlat)
- ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
- ++NumCSInlined;
+ // Now populate the list of newly exposed call sites.
+ if (InlinedCallSites) {
+ InlinedCallSites->clear();
+ for (auto &I : IFI.InlinedCallSites)
+ InlinedCallSites->push_back(I);
+ }
- // Prorate inlined probes for a duplicated inlining callsite which probably
- // has a distribution less than 100%. Samples for an inlinee should be
- // distributed among the copies of the original callsite based on each
- // callsite's distribution factor for counts accuracy. Note that an inlined
- // probe may come with its own distribution factor if it has been duplicated
- // in the inlinee body. The two factor are multiplied to reflect the
- // aggregation of duplication.
- if (Candidate.CallsiteDistribution < 1) {
- for (auto &I : IFI.InlinedCallSites) {
- if (Optional<PseudoProbe> Probe = extractProbe(*I))
- setProbeDistributionFactor(*I, Probe->Factor *
- Candidate.CallsiteDistribution);
- }
- NumDuplicatedInlinesite++;
- }
+ if (FunctionSamples::ProfileIsCS)
+ ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+ ++NumCSInlined;
- return true;
+ // Prorate inlined probes for a duplicated inlining callsite which probably
+ // has a distribution less than 100%. Samples for an inlinee should be
+ // distributed among the copies of the original callsite based on each
+ // callsite's distribution factor for counts accuracy. Note that an inlined
+ // probe may come with its own distribution factor if it has been duplicated
+ // in the inlinee body. The two factor are multiplied to reflect the
+ // aggregation of duplication.
+ if (Candidate.CallsiteDistribution < 1) {
+ for (auto &I : IFI.InlinedCallSites) {
+ if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ setProbeDistributionFactor(*I, Probe->Factor *
+ Candidate.CallsiteDistribution);
+ }
+ NumDuplicatedInlinesite++;
}
- return false;
+
+ return true;
}
bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
@@ -1285,14 +1324,8 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
if (Optional<PseudoProbe> Probe = extractProbe(*CB))
Factor = Probe->Factor;
- uint64_t CallsiteCount = 0;
- ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
- if (Weight)
- CallsiteCount = Weight.get();
- if (CalleeSamples)
- CallsiteCount = std::max(
- CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
-
+ uint64_t CallsiteCount =
+ CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0;
*NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
return true;
}
@@ -1387,7 +1420,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
-
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
// Profile symbol list is ignored when profile-sample-accurate is on.
assert((!ProfAccForSymsInList ||
@@ -1513,7 +1545,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
// For CS profile, profile for not inlined context will be merged when
// base profile is being retrieved.
- if (!FunctionSamples::ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
return Changed;
}
@@ -1528,11 +1560,11 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
if (!Callee || Callee->isDeclaration())
continue;
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
- I->getDebugLoc(), I->getParent())
- << "previous inlining not repeated: '"
- << ore::NV("Callee", Callee) << "' into '"
- << ore::NV("Caller", &F) << "'");
+ ORE->emit(
+ OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
+ << "' into '" << ore::NV("Caller", &F) << "'");
++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();
@@ -1540,6 +1572,10 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
continue;
}
+ // Do not merge a context that is already duplicated into the base profile.
+ if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
+ continue;
+
if (ProfileMergeInlinee) {
// A function call can be replicated by optimizations like callsite
// splitting or jump threading and the replicates end up sharing the
@@ -1623,7 +1659,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
// With CSSPGO all indirect call targets are counted torwards the
// original indirect call site in the profile, including both
// inlined and non-inlined targets.
- if (!FunctionSamples::ProfileIsCSFlat) {
+ if (!FunctionSamples::ProfileIsCS) {
if (const FunctionSamplesMap *M =
FS->findFunctionSamplesMapAt(CallSite)) {
for (const auto &NameFS : *M)
@@ -1714,6 +1750,11 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
}
}
+ // FIXME: Re-enable for sample profiling after investigating why the sum
+ // of branch weights can be 0
+ //
+ // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
+
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
@@ -1798,7 +1839,7 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
std::unique_ptr<ProfiledCallGraph>
SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG;
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
else
ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
@@ -1843,8 +1884,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
assert(&CG->getModule() == &M);
- if (UseProfiledCallGraph ||
- (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) {
+ if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS &&
+ !UseProfiledCallGraph.getNumOccurrences())) {
// Use profiled call edges to augment the top-down order. There are cases
// that the top-down order computed based on the static call graph doesn't
// reflect real execution order. For example
@@ -1973,40 +2014,50 @@ bool SampleProfileLoader::doInitialization(Module &M,
ProfileInlineReplayScope,
ProfileInlineReplayFallback,
{ProfileInlineReplayFormat}},
- /*EmitRemarks=*/false);
+ /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
}
- // Apply tweaks if context-sensitive profile is available.
- if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) {
- ProfileIsCSFlat = Reader->profileIsCSFlat();
+ // Apply tweaks if context-sensitive or probe-based profile is available.
+ if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
+ Reader->profileIsProbeBased()) {
+ if (!UseIterativeBFIInference.getNumOccurrences())
+ UseIterativeBFIInference = true;
+ if (!SampleProfileUseProfi.getNumOccurrences())
+ SampleProfileUseProfi = true;
+ if (!EnableExtTspBlockPlacement.getNumOccurrences())
+ EnableExtTspBlockPlacement = true;
// Enable priority-base inliner and size inline by default for CSSPGO.
if (!ProfileSizeInline.getNumOccurrences())
ProfileSizeInline = true;
if (!CallsitePrioritizedInline.getNumOccurrences())
CallsitePrioritizedInline = true;
-
- // For CSSPGO, use preinliner decision by default when available.
- if (!UsePreInlinerDecision.getNumOccurrences())
- UsePreInlinerDecision = true;
-
// For CSSPGO, we also allow recursive inline to best use context profile.
if (!AllowRecursiveInline.getNumOccurrences())
AllowRecursiveInline = true;
- // Enable iterative-BFI by default for CSSPGO.
- if (!UseIterativeBFIInference.getNumOccurrences())
- UseIterativeBFIInference = true;
- // Enable Profi by default for CSSPGO.
- if (!SampleProfileUseProfi.getNumOccurrences())
- SampleProfileUseProfi = true;
+ if (Reader->profileIsPreInlined()) {
+ if (!UsePreInlinerDecision.getNumOccurrences())
+ UsePreInlinerDecision = true;
+ }
- if (FunctionSamples::ProfileIsCSFlat) {
- // Tracker for profiles under different context
- ContextTracker = std::make_unique<SampleContextTracker>(
- Reader->getProfiles(), &GUIDToFuncNameMap);
+ if (!Reader->profileIsCS()) {
+ // Non-CS profile should be fine without a function size budget for the
+ // inliner since the contexts in the profile are either all from inlining
+ // in the prevoius build or pre-computed by the preinliner with a size
+ // cap, thus they are bounded.
+ if (!ProfileInlineLimitMin.getNumOccurrences())
+ ProfileInlineLimitMin = std::numeric_limits<unsigned>::max();
+ if (!ProfileInlineLimitMax.getNumOccurrences())
+ ProfileInlineLimitMax = std::numeric_limits<unsigned>::max();
}
}
+ if (Reader->profileIsCS()) {
+ // Tracker for profiles under different context
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
+ }
+
// Load pseudo probe descriptors for probe-based function samples.
if (Reader->profileIsProbeBased()) {
ProbeManager = std::make_unique<PseudoProbeManager>(M);
@@ -2082,7 +2133,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
// Account for cold calls not inlined....
- if (!ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
notInlinedCallInfo)
updateProfileCallee(pair.first, pair.second.entryCount);
@@ -2145,7 +2196,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
// Initialize entry count when the function has no existing entry
// count value.
- if (!F.getEntryCount().hasValue())
+ if (!F.getEntryCount())
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
@@ -2158,7 +2209,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
ORE = OwnedORE.get();
}
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
Samples = ContextTracker->getBaseSamplesFor(F);
else
Samples = Reader->getSamplesFor(F);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index e104ae00e916..d1ab2649ee2e 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -13,21 +13,19 @@
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <unordered_set>
@@ -416,7 +414,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
FunctionAnalysisManager &FAM) {
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto BBProfileCount = [&BFI](BasicBlock *BB) {
- return BFI.getBlockProfileCount(BB).getValueOr(0);
+ return BFI.getBlockProfileCount(BB).value_or(0);
};
// Collect the sum of execution weight for each probe.
diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 95393d9476e0..c7d54b8cdeb0 100644
--- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -25,18 +25,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using Scaled64 = ScaledNumber<uint64_t>;
@@ -47,18 +42,17 @@ using ProfileCount = Function::ProfileCount;
namespace llvm {
cl::opt<int>
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
- cl::ZeroOrMore,
cl::desc("Initial value of synthetic entry count"));
} // namespace llvm
/// Initial synthetic count assigned to inline functions.
static cl::opt<int> InlineSyntheticCount(
- "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
+ "inline-synthetic-count", cl::Hidden, cl::init(15),
cl::desc("Initial synthetic entry count for inline functions."));
/// Initial synthetic count assigned to cold functions.
static cl::opt<int> ColdSyntheticCount(
- "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
+ "cold-synthetic-count", cl::Hidden, cl::init(5),
cl::desc("Initial synthetic entry count for cold functions."));
// Assign initial synthetic entry counts to functions.
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 52708ff2f226..a360a768a2bc 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -21,7 +21,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
@@ -311,7 +310,8 @@ void splitAndWriteThinLTOBitcode(
return;
}
if (!F->isDeclaration() &&
- computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+ computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) ==
+ FMRB_DoesNotAccessMemory)
EligibleVirtualFns.insert(F);
});
}
@@ -542,11 +542,11 @@ class WriteThinLTOBitcode : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
// The output stream on which to emit a minimized module for use
// just in the thin link, if requested.
- raw_ostream *ThinLinkOS;
+ raw_ostream *ThinLinkOS = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
- WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
+ WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 8b30f0e989a1..898a213d0849 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -57,6 +57,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -79,6 +80,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
@@ -95,6 +97,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include <algorithm>
#include <cstddef>
@@ -107,6 +110,15 @@ using namespace wholeprogramdevirt;
#define DEBUG_TYPE "wholeprogramdevirt"
+STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
+STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
+STATISTIC(NumBranchFunnel, "Number of branch funnels");
+STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
+STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
+STATISTIC(NumVirtConstProp1Bit,
+ "Number of 1 bit virtual constant propagations");
+STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
+
static cl::opt<PassSummaryAction> ClSummaryAction(
"wholeprogramdevirt-summary-action",
cl::desc("What to do with the summary when running this pass"),
@@ -132,13 +144,12 @@ static cl::opt<std::string> ClWriteSummary(
static cl::opt<unsigned>
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
- cl::init(10), cl::ZeroOrMore,
+ cl::init(10),
cl::desc("Maximum number of call targets per "
"call site to enable branch funnels"));
static cl::opt<bool>
PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
- cl::init(false), cl::ZeroOrMore,
cl::desc("Print index-based devirtualization messages"));
/// Provide a way to force enable whole program visibility in tests.
@@ -146,30 +157,34 @@ static cl::opt<bool>
/// !vcall_visibility metadata (the mere presense of type tests
/// previously implied hidden visibility).
static cl::opt<bool>
- WholeProgramVisibility("whole-program-visibility", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
+ WholeProgramVisibility("whole-program-visibility", cl::Hidden,
cl::desc("Enable whole program visibility"));
/// Provide a way to force disable whole program for debugging or workarounds,
/// when enabled via the linker.
static cl::opt<bool> DisableWholeProgramVisibility(
- "disable-whole-program-visibility", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
+ "disable-whole-program-visibility", cl::Hidden,
cl::desc("Disable whole program visibility (overrides enabling options)"));
/// Provide way to prevent certain function from being devirtualized
static cl::list<std::string>
SkipFunctionNames("wholeprogramdevirt-skip",
cl::desc("Prevent function(s) from being devirtualized"),
- cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated);
+ cl::Hidden, cl::CommaSeparated);
-/// Mechanism to add runtime checking of devirtualization decisions, trapping on
-/// any that are not correct. Useful for debugging undefined behavior leading to
-/// failures with WPD.
-static cl::opt<bool>
- CheckDevirt("wholeprogramdevirt-check", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
- cl::desc("Add code to trap on incorrect devirtualizations"));
+/// Mechanism to add runtime checking of devirtualization decisions, optionally
+/// trapping or falling back to indirect call on any that are not correct.
+/// Trapping mode is useful for debugging undefined behavior leading to failures
+/// with WPD. Fallback mode is useful for ensuring safety when whole program
+/// visibility may be compromised.
+enum WPDCheckMode { None, Trap, Fallback };
+static cl::opt<WPDCheckMode> DevirtCheckMode(
+ "wholeprogramdevirt-check", cl::Hidden,
+ cl::desc("Type of checking for incorrect devirtualizations"),
+ cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
+ clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
+ clEnumValN(WPDCheckMode::Fallback, "fallback",
+ "Fallback to indirect when incorrect")));
namespace {
struct PatternList {
@@ -866,13 +881,14 @@ void updateVCallVisibilityInIndex(
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
for (auto &P : Index) {
+ // Don't upgrade the visibility for symbols exported to the dynamic
+ // linker, as we have no information on their eventual use.
+ if (DynamicExportSymbols.count(P.first))
+ continue;
for (auto &S : P.second.SummaryList) {
auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
if (!GVar ||
- GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic ||
- // Don't upgrade the visibility for symbols exported to the dynamic
- // linker, as we have no information on their eventual use.
- DynamicExportSymbols.count(P.first))
+ GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
continue;
GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
}
@@ -1133,16 +1149,17 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
if (RemarksEnabled)
VCallSite.emitRemark("single-impl",
TheFn->stripPointerCasts()->getName(), OREGetter);
+ NumSingleImpl++;
auto &CB = VCallSite.CB;
assert(!CB.getCalledFunction() && "devirtualizing direct call?");
IRBuilder<> Builder(&CB);
Value *Callee =
Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
- // If checking is enabled, add support to compare the virtual function
- // pointer to the devirtualized target. In case of a mismatch, perform a
- // debug trap.
- if (CheckDevirt) {
+ // If trap checking is enabled, add support to compare the virtual
+ // function pointer to the devirtualized target. In case of a mismatch,
+ // perform a debug trap.
+ if (DevirtCheckMode == WPDCheckMode::Trap) {
auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
Instruction *ThenTerm =
SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
@@ -1152,8 +1169,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
CallTrap->setDebugLoc(CB.getDebugLoc());
}
- // Devirtualize.
- CB.setCalledOperand(Callee);
+ // If fallback checking is enabled, add support to compare the virtual
+ // function pointer to the devirtualized target. In case of a mismatch,
+ // fall back to indirect call.
+ if (DevirtCheckMode == WPDCheckMode::Fallback) {
+ MDNode *Weights =
+ MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1);
+ // Version the indirect call site. If the called value is equal to the
+ // given callee, 'NewInst' will be executed, otherwise the original call
+ // site will be executed.
+ CallBase &NewInst = versionCallSite(CB, Callee, Weights);
+ NewInst.setCalledOperand(Callee);
+ // Since the new call site is direct, we must clear metadata that
+ // is only appropriate for indirect calls. This includes !prof and
+ // !callees metadata.
+ NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
+ NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
+ // Additionally, we should remove them from the fallback indirect call,
+ // so that we don't attempt to perform indirect call promotion later.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ CB.setMetadata(LLVMContext::MD_callees, nullptr);
+ }
+
+ // In either trapping or non-checking mode, devirtualize original call.
+ else {
+ // Devirtualize unconditionally.
+ CB.setCalledOperand(Callee);
+ // Since the call site is now direct, we must clear metadata that
+ // is only appropriate for indirect calls. This includes !prof and
+ // !callees metadata.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ CB.setMetadata(LLVMContext::MD_callees, nullptr);
+ }
// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)
@@ -1208,7 +1255,7 @@ bool DevirtModule::trySingleImplDevirt(
return false;
// If so, update each call site to call that implementation directly.
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
TargetsForSlot[0].WasDevirt = true;
bool IsExported = false;
@@ -1279,7 +1326,7 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
return false;
// Collect functions devirtualized at least for one call site for stats.
- if (PrintSummaryDevirt)
+ if (PrintSummaryDevirt || AreStatisticsEnabled())
DevirtTargets.insert(TheFn);
auto &S = TheFn.getSummaryList()[0];
@@ -1385,6 +1432,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
!FSAttr.getValueAsString().contains("+retpoline"))
continue;
+ NumBranchFunnel++;
if (RemarksEnabled)
VCallSite.emitRemark("branch-funnel",
JT->stripPointerCasts()->getName(), OREGetter);
@@ -1476,6 +1524,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
for (auto Call : CSInfo.CallSites) {
if (!OptimizedCalls.insert(&Call.CB).second)
continue;
+ NumUniformRetVal++;
Call.replaceAndErase(
"uniform-ret-val", FnName, RemarksEnabled, OREGetter,
ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
@@ -1499,7 +1548,7 @@ bool DevirtModule::tryUniformRetValOpt(
}
applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
@@ -1592,6 +1641,7 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
Cmp = B.CreateZExt(Cmp, Call.CB.getType());
+ NumUniqueRetVal++;
Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
Cmp);
}
@@ -1636,7 +1686,7 @@ bool DevirtModule::tryUniqueRetValOpt(
UniqueMemberAddr);
// Update devirtualization statistics for targets.
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
@@ -1665,11 +1715,13 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Value *Bits = B.CreateLoad(Int8Ty, Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
+ NumVirtConstProp1Bit++;
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
OREGetter, IsBitSet);
} else {
Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
Value *Val = B.CreateLoad(RetType, ValAddr);
+ NumVirtConstProp++;
Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
OREGetter, Val);
}
@@ -1701,7 +1753,7 @@ bool DevirtModule::tryVirtualConstProp(
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->isDeclaration() ||
computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
- MAK_ReadNone ||
+ FMRB_DoesNotAccessMemory ||
Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
Target.Fn->getReturnType() != RetType)
return false;
@@ -1755,7 +1807,7 @@ bool DevirtModule::tryVirtualConstProp(
setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
OffsetBit);
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
@@ -1963,7 +2015,7 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
// (although this is unlikely). In that case, explicitly build a pair and
// RAUW it.
if (!CI->use_empty()) {
- Value *Pair = UndefValue::get(CI->getType());
+ Value *Pair = PoisonValue::get(CI->getType());
IRBuilder<> B(CI);
Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
@@ -2151,9 +2203,9 @@ bool DevirtModule::run() {
removeRedundantTypeTests();
- // We have lowered or deleted the type instrinsics, so we will no
- // longer have enough information to reason about the liveness of virtual
- // function pointers in GlobalDCE.
+ // We have lowered or deleted the type intrinsics, so we will no longer have
+ // enough information to reason about the liveness of virtual function
+ // pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
@@ -2243,7 +2295,7 @@ bool DevirtModule::run() {
}
// Collect functions devirtualized at least for one call site for stats.
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (const auto &T : TargetsForSlot)
if (T.WasDevirt)
DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
@@ -2276,6 +2328,8 @@ bool DevirtModule::run() {
}
}
+ NumDevirtTargets += DevirtTargets.size();
+
removeRedundantTypeTests();
// Rebuild each global we touched as part of virtual constant propagation to
@@ -2284,9 +2338,9 @@ bool DevirtModule::run() {
for (VTableBits &B : Bits)
rebuildGlobal(B);
- // We have lowered or deleted the type instrinsics, so we will no
- // longer have enough information to reason about the liveness of virtual
- // function pointers in GlobalDCE.
+ // We have lowered or deleted the type intrinsics, so we will no longer have
+ // enough information to reason about the liveness of virtual function
+ // pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
@@ -2367,4 +2421,6 @@ void DevirtIndex::run() {
if (PrintSummaryDevirt)
for (const auto &DT : DevirtTargets)
errs() << "Devirtualized call to " << DT << "\n";
+
+ NumDevirtTargets += DevirtTargets.size();
}