aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Utils
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-08-22 19:00:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-13 20:39:49 +0000
commitfe6060f10f634930ff71b7c50291ddc610da2475 (patch)
tree1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/llvm/lib/Transforms/Utils
parentb61bce17f346d79cecfd8f195a64b10f77be43b1 (diff)
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Utils')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp328
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp357
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp540
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp190
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp676
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp577
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp334
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp408
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp212
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp1713
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp177
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp392
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp1259
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp198
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp82
50 files changed, 6025 insertions, 2593 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index ccdcf7cbce38..8cd16ca3906f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -134,11 +134,11 @@ static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
PtrPhi->addIncoming(Str, Prev);
- auto PtrNext = Builder.CreateGEP(PtrPhi, One);
+ auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
PtrPhi->addIncoming(PtrNext, While);
// Condition for the while loop.
- auto Data = Builder.CreateLoad(PtrPhi);
+ auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
Builder.CreateCondBr(Cmp, WhileDone, While);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 0908b361a4d4..e789194eb3ab 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -70,9 +70,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <utility>
using namespace llvm;
+using namespace sampleprofutil;
#define DEBUG_TYPE "add-discriminators"
@@ -172,6 +174,10 @@ static bool addDiscriminators(Function &F) {
if (NoDiscriminators || !F.getSubprogram())
return false;
+ // Create FSDiscriminatorVariable if flow sensitive discriminators are used.
+ if (EnableFSDiscriminator)
+ createFSDiscriminatorVariable(F.getParent());
+
bool Changed = false;
using Location = std::pair<StringRef, unsigned>;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index 3daff3b4430b..d689e04da36f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "assume-builder"
-
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/MapVector.h"
@@ -27,6 +25,7 @@
using namespace llvm;
+namespace llvm {
cl::opt<bool> ShouldPreserveAllAttributes(
"assume-preserve-all", cl::init(false), cl::Hidden,
cl::desc("enable preservation of all attrbitues. even those that are "
@@ -36,6 +35,9 @@ cl::opt<bool> EnableKnowledgeRetention(
"enable-knowledge-retention", cl::init(false), cl::Hidden,
cl::desc(
"enable preservation of attributes throughout code transformation"));
+} // namespace llvm
+
+#define DEBUG_TYPE "assume-builder"
STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder");
STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built");
@@ -65,7 +67,7 @@ bool isUsefullToPreserve(Attribute::AttrKind Kind) {
/// This function will try to transform the given knowledge into a more
/// canonical one. the canonical knowledge maybe the given one.
-RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
+RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, DataLayout DL) {
switch (RK.AttrKind) {
default:
return RK;
@@ -76,8 +78,7 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) {
if (auto *GEP = dyn_cast<GEPOperator>(Strip))
RK.ArgValue =
- MinAlign(RK.ArgValue,
- GEP->getMaxPreservedAlignment(M->getDataLayout()).value());
+ MinAlign(RK.ArgValue, GEP->getMaxPreservedAlignment(DL).value());
});
RK.WasOn = V;
return RK;
@@ -85,8 +86,8 @@ RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
case Attribute::Dereferenceable:
case Attribute::DereferenceableOrNull: {
int64_t Offset = 0;
- Value *V = GetPointerBaseWithConstantOffset(
- RK.WasOn, Offset, M->getDataLayout(), /*AllowNonInBounds*/ false);
+ Value *V = GetPointerBaseWithConstantOffset(RK.WasOn, Offset, DL,
+ /*AllowNonInBounds*/ false);
if (Offset < 0)
return RK;
RK.ArgValue = RK.ArgValue + Offset;
@@ -103,16 +104,16 @@ struct AssumeBuilderState {
using MapKey = std::pair<Value *, Attribute::AttrKind>;
SmallMapVector<MapKey, unsigned, 8> AssumedKnowledgeMap;
- Instruction *InstBeingRemoved = nullptr;
+ Instruction *InstBeingModified = nullptr;
AssumptionCache* AC = nullptr;
DominatorTree* DT = nullptr;
AssumeBuilderState(Module *M, Instruction *I = nullptr,
AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr)
- : M(M), InstBeingRemoved(I), AC(AC), DT(DT) {}
+ : M(M), InstBeingModified(I), AC(AC), DT(DT) {}
bool tryToPreserveWithoutAddingAssume(RetainedKnowledge RK) {
- if (!InstBeingRemoved || !RK.WasOn)
+ if (!InstBeingModified || !RK.WasOn)
return false;
bool HasBeenPreserved = false;
Use* ToUpdate = nullptr;
@@ -120,13 +121,12 @@ struct AssumeBuilderState {
RK.WasOn, {RK.AttrKind}, AC,
[&](RetainedKnowledge RKOther, Instruction *Assume,
const CallInst::BundleOpInfo *Bundle) {
- if (!isValidAssumeForContext(Assume, InstBeingRemoved, DT))
+ if (!isValidAssumeForContext(Assume, InstBeingModified, DT))
return false;
if (RKOther.ArgValue >= RK.ArgValue) {
HasBeenPreserved = true;
return true;
- } else if (isValidAssumeForContext(InstBeingRemoved, Assume,
- DT)) {
+ } else if (isValidAssumeForContext(InstBeingModified, Assume, DT)) {
HasBeenPreserved = true;
IntrinsicInst *Intr = cast<IntrinsicInst>(Assume);
ToUpdate = &Intr->op_begin()[Bundle->Begin + ABA_Argument];
@@ -152,7 +152,7 @@ struct AssumeBuilderState {
}
if (auto *Arg = dyn_cast<Argument>(RK.WasOn)) {
if (Arg->hasAttribute(RK.AttrKind) &&
- (!Attribute::doesAttrKindHaveArgument(RK.AttrKind) ||
+ (!Attribute::isIntAttrKind(RK.AttrKind) ||
Arg->getAttribute(RK.AttrKind).getValueAsInt() >= RK.ArgValue))
return false;
return true;
@@ -162,14 +162,14 @@ struct AssumeBuilderState {
if (RK.WasOn->use_empty())
return false;
Use *SingleUse = RK.WasOn->getSingleUndroppableUse();
- if (SingleUse && SingleUse->getUser() == InstBeingRemoved)
+ if (SingleUse && SingleUse->getUser() == InstBeingModified)
return false;
}
return true;
}
void addKnowledge(RetainedKnowledge RK) {
- RK = canonicalizedKnowledge(RK, M);
+ RK = canonicalizedKnowledge(RK, M->getDataLayout());
if (!isKnowledgeWorthPreserving(RK))
return;
@@ -206,8 +206,12 @@ struct AssumeBuilderState {
auto addAttrList = [&](AttributeList AttrList) {
for (unsigned Idx = AttributeList::FirstArgIndex;
Idx < AttrList.getNumAttrSets(); Idx++)
- for (Attribute Attr : AttrList.getAttributes(Idx))
- addAttribute(Attr, Call->getArgOperand(Idx - 1));
+ for (Attribute Attr : AttrList.getAttributes(Idx)) {
+ bool IsPoisonAttr = Attr.hasAttribute(Attribute::NonNull) ||
+ Attr.hasAttribute(Attribute::Alignment);
+ if (!IsPoisonAttr || Call->isPassingUndefUB(Idx - 1))
+ addAttribute(Attr, Call->getArgOperand(Idx - 1));
+ }
for (Attribute Attr : AttrList.getFnAttributes())
addAttribute(Attr, nullptr);
};
@@ -216,7 +220,7 @@ struct AssumeBuilderState {
addAttrList(Fn->getAttributes());
}
- IntrinsicInst *build() {
+ AssumeInst *build() {
if (AssumedKnowledgeMap.empty())
return nullptr;
if (!DebugCounter::shouldExecute(BuildAssumeCounter))
@@ -240,7 +244,7 @@ struct AssumeBuilderState {
NumBundlesInAssumes++;
}
NumAssumeBuilt++;
- return cast<IntrinsicInst>(CallInst::Create(
+ return cast<AssumeInst>(CallInst::Create(
FnAssume, ArrayRef<Value *>({ConstantInt::getTrue(C)}), OpBundle));
}
@@ -278,7 +282,7 @@ struct AssumeBuilderState {
} // namespace
-IntrinsicInst *llvm::buildAssumeFromInst(Instruction *I) {
+AssumeInst *llvm::buildAssumeFromInst(Instruction *I) {
if (!EnableKnowledgeRetention)
return nullptr;
AssumeBuilderState Builder(I->getModule());
@@ -292,13 +296,38 @@ void llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC,
return;
AssumeBuilderState Builder(I->getModule(), I, AC, DT);
Builder.addInstruction(I);
- if (IntrinsicInst *Intr = Builder.build()) {
+ if (auto *Intr = Builder.build()) {
Intr->insertBefore(I);
if (AC)
AC->registerAssumption(Intr);
}
}
+AssumeInst *
+llvm::buildAssumeFromKnowledge(ArrayRef<RetainedKnowledge> Knowledge,
+ Instruction *CtxI, AssumptionCache *AC,
+ DominatorTree *DT) {
+ AssumeBuilderState Builder(CtxI->getModule(), CtxI, AC, DT);
+ for (const RetainedKnowledge &RK : Knowledge)
+ Builder.addKnowledge(RK);
+ return Builder.build();
+}
+
+RetainedKnowledge llvm::simplifyRetainedKnowledge(AssumeInst *Assume,
+ RetainedKnowledge RK,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ AssumeBuilderState Builder(Assume->getModule(), Assume, AC, DT);
+ RK = canonicalizedKnowledge(RK, Assume->getModule()->getDataLayout());
+
+ if (!Builder.isKnowledgeWorthPreserving(RK))
+ return RetainedKnowledge::none();
+
+ if (Builder.tryToPreserveWithoutAddingAssume(RK))
+ return RetainedKnowledge::none();
+ return RK;
+}
+
namespace {
struct AssumeSimplify {
@@ -344,7 +373,8 @@ struct AssumeSimplify {
for (IntrinsicInst *Assume : CleanupToDo) {
auto *Arg = dyn_cast<ConstantInt>(Assume->getOperand(0));
if (!Arg || Arg->isZero() ||
- (!ForceCleanup && !isAssumeWithEmptyBundle(*Assume)))
+ (!ForceCleanup &&
+ !isAssumeWithEmptyBundle(cast<AssumeInst>(*Assume))))
continue;
MadeChange = true;
if (ForceCleanup)
@@ -387,11 +417,12 @@ struct AssumeSimplify {
CleanupToDo.insert(Assume);
continue;
}
- RetainedKnowledge RK = getKnowledgeFromBundle(*Assume, BOI);
+ RetainedKnowledge RK =
+ getKnowledgeFromBundle(cast<AssumeInst>(*Assume), BOI);
if (auto *Arg = dyn_cast_or_null<Argument>(RK.WasOn)) {
bool HasSameKindAttr = Arg->hasAttribute(RK.AttrKind);
if (HasSameKindAttr)
- if (!Attribute::doesAttrKindHaveArgument(RK.AttrKind) ||
+ if (!Attribute::isIntAttrKind(RK.AttrKind) ||
Arg->getAttribute(RK.AttrKind).getValueAsInt() >=
RK.ArgValue) {
RemoveFromAssume();
@@ -446,7 +477,8 @@ struct AssumeSimplify {
for (IntrinsicInst *I : make_range(Begin, End)) {
CleanupToDo.insert(I);
for (CallInst::BundleOpInfo &BOI : I->bundle_op_infos()) {
- RetainedKnowledge RK = getKnowledgeFromBundle(*I, BOI);
+ RetainedKnowledge RK =
+ getKnowledgeFromBundle(cast<AssumeInst>(*I), BOI);
if (!RK)
continue;
Builder.addKnowledge(RK);
@@ -466,7 +498,7 @@ struct AssumeSimplify {
InsertPt = It->getNextNode();
break;
}
- IntrinsicInst *MergedAssume = Builder.build();
+ auto *MergedAssume = Builder.build();
if (!MergedAssume)
return;
MadeChange = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6bcd42c4c6d8..ee933b638a23 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -124,11 +125,9 @@ bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
// Collect all dead blocks.
std::vector<BasicBlock*> DeadBlocks;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (!Reachable.count(&*I)) {
- BasicBlock *BB = &*I;
- DeadBlocks.push_back(BB);
- }
+ for (BasicBlock &BB : F)
+ if (!Reachable.count(&BB))
+ DeadBlocks.push_back(&BB);
// Delete the dead blocks.
DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs);
@@ -209,9 +208,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Can't merge if there is PHI loop.
for (PHINode &PN : BB->phis())
- for (Value *IncValue : PN.incoming_values())
- if (IncValue == &PN)
- return false;
+ if (llvm::is_contained(PN.incoming_values(), &PN))
+ return false;
LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
<< PredBB->getName() << "\n");
@@ -230,21 +228,22 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
- SmallSetVector<BasicBlock *, 2> UniqueSuccessors(succ_begin(BB),
- succ_end(BB));
- Updates.reserve(1 + (2 * UniqueSuccessors.size()));
+ SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB));
+ SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
+ succ_begin(PredBB));
+ Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1);
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
// respectively, it is beneficial to have all insert updates first. Deleting
// edges first may lead to unreachable blocks, followed by inserting edges
// making the blocks reachable again. Such DT updates lead to high compile
// times. We add inserts before deletes here to reduce compile time.
- for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
- // This successor of BB may already have PredBB as a predecessor.
- if (!llvm::is_contained(successors(PredBB), UniqueSuccessor))
- Updates.push_back({DominatorTree::Insert, PredBB, UniqueSuccessor});
- for (BasicBlock *UniqueSuccessor : UniqueSuccessors)
- Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
+ for (BasicBlock *SuccOfBB : SuccsOfBB)
+ // This successor of BB may already be a PredBB's successor.
+ if (!SuccsOfPredBB.contains(SuccOfBB))
+ Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB});
+ for (BasicBlock *SuccOfBB : SuccsOfBB)
+ Updates.push_back({DominatorTree::Delete, BB, SuccOfBB});
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
@@ -299,17 +298,11 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
if (MemDep)
MemDep->invalidateCachedPredecessors();
- // Finally, erase the old block and update dominator info.
- if (DTU) {
- assert(BB->getInstList().size() == 1 &&
- isa<UnreachableInst>(BB->getTerminator()) &&
- "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
+ if (DTU)
DTU->applyUpdates(Updates);
- DTU->deleteBB(BB);
- } else {
- BB->eraseFromParent(); // Nuke BB if DTU is nullptr.
- }
+
+ // Finally, erase the old block and update dominator info.
+ DeleteDeadBlock(BB, DTU);
return true;
}
@@ -409,7 +402,8 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
/// - Keep track of non-overlapping fragments.
static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
SmallVector<DbgValueInst *, 8> ToBeRemoved;
- DenseMap<DebugVariable, std::pair<Value *, DIExpression *> > VariableMap;
+ DenseMap<DebugVariable, std::pair<SmallVector<Value *, 4>, DIExpression *>>
+ VariableMap;
for (auto &I : *BB) {
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
DebugVariable Key(DVI->getVariable(),
@@ -418,10 +412,10 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
auto VMI = VariableMap.find(Key);
// Update the map if we found a new value/expression describing the
// variable, or if the variable wasn't mapped already.
- if (VMI == VariableMap.end() ||
- VMI->second.first != DVI->getValue() ||
+ SmallVector<Value *, 4> Values(DVI->getValues());
+ if (VMI == VariableMap.end() || VMI->second.first != Values ||
VMI->second.second != DVI->getExpression()) {
- VariableMap[Key] = { DVI->getValue(), DVI->getExpression() };
+ VariableMap[Key] = {Values, DVI->getExpression()};
continue;
}
// Found an identical mapping. Remember the instruction for later removal.
@@ -501,13 +495,20 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
const Twine &BBName) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
- // If this is a critical edge, let SplitCriticalEdge do it.
Instruction *LatchTerm = BB->getTerminator();
- if (SplitCriticalEdge(
- LatchTerm, SuccNum,
- CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA(),
- BBName))
- return LatchTerm->getSuccessor(SuccNum);
+
+ CriticalEdgeSplittingOptions Options =
+ CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA();
+
+ if ((isCriticalEdge(LatchTerm, SuccNum, Options.MergeIdenticalEdges))) {
+ // If it is a critical edge, and the succesor is an exception block, handle
+ // the split edge logic in this specific function
+ if (Succ->isEHPad())
+ return ehAwareSplitEdge(BB, Succ, nullptr, nullptr, Options, BBName);
+
+ // If this is a critical edge, let SplitKnownCriticalEdge do it.
+ return SplitKnownCriticalEdge(LatchTerm, SuccNum, Options, BBName);
+ }
// If the edge isn't critical, then BB has a single successor or Succ has a
// single pred. Split the block.
@@ -527,6 +528,218 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
}
+void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
+ if (auto *II = dyn_cast<InvokeInst>(TI))
+ II->setUnwindDest(Succ);
+ else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
+ CS->setUnwindDest(Succ);
+ else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
+ CR->setUnwindDest(Succ);
+ else
+ llvm_unreachable("unexpected terminator instruction");
+}
+
+void llvm::updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
+ BasicBlock *NewPred, PHINode *Until) {
+ int BBIdx = 0;
+ for (PHINode &PN : DestBB->phis()) {
+ // We manually update the LandingPadReplacement PHINode and it is the last
+ // PHI Node. So, if we find it, we are done.
+ if (Until == &PN)
+ break;
+
+ // Reuse the previous value of BBIdx if it lines up. In cases where we
+ // have multiple phi nodes with *lots* of predecessors, this is a speed
+ // win because we don't have to scan the PHI looking for TIBB. This
+ // happens because the BB list of PHI nodes are usually in the same
+ // order.
+ if (PN.getIncomingBlock(BBIdx) != OldPred)
+ BBIdx = PN.getBasicBlockIndex(OldPred);
+
+ assert(BBIdx != -1 && "Invalid PHI Index!");
+ PN.setIncomingBlock(BBIdx, NewPred);
+ }
+}
+
+BasicBlock *llvm::ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
+ LandingPadInst *OriginalPad,
+ PHINode *LandingPadReplacement,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
+
+ auto *PadInst = Succ->getFirstNonPHI();
+ if (!LandingPadReplacement && !PadInst->isEHPad())
+ return SplitEdge(BB, Succ, Options.DT, Options.LI, Options.MSSAU, BBName);
+
+ auto *LI = Options.LI;
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ // Check if extra modifications will be required to preserve loop-simplify
+ // form after splitting. If it would require splitting blocks with IndirectBr
+ // terminators, bail out if preserving loop-simplify form is requested.
+ if (Options.PreserveLoopSimplify && LI) {
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+
+ // The only way that we can break LoopSimplify form by splitting a
+ // critical edge is when there exists some edge from BBLoop to Succ *and*
+ // the only edge into Succ from outside of BBLoop is that of NewBB after
+ // the split. If the first isn't true, then LoopSimplify still holds,
+ // NewBB is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then Succ was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in BBLoop, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ for (BasicBlock *P : predecessors(Succ)) {
+ if (P == BB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != BBLoop) {
+ // Loop is not in LoopSimplify form, no need to re simplify after
+ // splitting edge.
+ LoopPreds.clear();
+ break;
+ }
+ LoopPreds.push_back(P);
+ }
+ // Loop-simplify form can be preserved, if we can split all in-loop
+ // predecessors.
+ if (any_of(LoopPreds, [](BasicBlock *Pred) {
+ return isa<IndirectBrInst>(Pred->getTerminator());
+ })) {
+ return nullptr;
+ }
+ }
+ }
+
+ auto *NewBB =
+ BasicBlock::Create(BB->getContext(), BBName, BB->getParent(), Succ);
+ setUnwindEdgeTo(BB->getTerminator(), NewBB);
+ updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
+
+ if (LandingPadReplacement) {
+ auto *NewLP = OriginalPad->clone();
+ auto *Terminator = BranchInst::Create(Succ, NewBB);
+ NewLP->insertBefore(Terminator);
+ LandingPadReplacement->addIncoming(NewLP, NewBB);
+ } else {
+ Value *ParentPad = nullptr;
+ if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
+ ParentPad = FuncletPad->getParentPad();
+ else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
+ ParentPad = CatchSwitch->getParentPad();
+ else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(PadInst))
+ ParentPad = CleanupPad->getParentPad();
+ else if (auto *LandingPad = dyn_cast<LandingPadInst>(PadInst))
+ ParentPad = LandingPad->getParent();
+ else
+ llvm_unreachable("handling for other EHPads not implemented yet");
+
+ auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, BBName, NewBB);
+ CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
+ }
+
+ auto *DT = Options.DT;
+ auto *MSSAU = Options.MSSAU;
+ if (!DT && !LI)
+ return NewBB;
+
+ if (DT) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+
+ Updates.push_back({DominatorTree::Insert, BB, NewBB});
+ Updates.push_back({DominatorTree::Insert, NewBB, Succ});
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+
+ DTU.applyUpdates(Updates);
+ DTU.flush();
+
+ if (MSSAU) {
+ MSSAU->applyUpdates(Updates, *DT);
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
+ }
+
+ if (LI) {
+ if (Loop *BBLoop = LI->getLoopFor(BB)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (Loop *SuccLoop = LI->getLoopFor(Succ)) {
+ if (BBLoop == SuccLoop) {
+ // Both in the same loop, the NewBB joins loop.
+ SuccLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else if (BBLoop->contains(SuccLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ BBLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else if (SuccLoop->contains(BBLoop)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ SuccLoop->addBasicBlockToLoop(NewBB, *LI);
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(SuccLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (Loop *P = SuccLoop->getParentLoop())
+ P->addBasicBlockToLoop(NewBB, *LI);
+ }
+ }
+
+ // If BB is in a loop and Succ is outside of that loop, we may need to
+ // update LoopSimplify form and LCSSA form.
+ if (!BBLoop->contains(Succ)) {
+ assert(!BBLoop->contains(NewBB) &&
+ "Split point for loop exit is contained in loop!");
+
+ // Update LCSSA form in the newly created exit block.
+ if (Options.PreserveLCSSA) {
+ createPHIsForSplitLoopExit(BB, NewBB, Succ);
+ }
+
+ if (!LoopPreds.empty()) {
+ BasicBlock *NewExitBB = SplitBlockPredecessors(
+ Succ, LoopPreds, "split", DT, LI, MSSAU, Options.PreserveLCSSA);
+ if (Options.PreserveLCSSA)
+ createPHIsForSplitLoopExit(LoopPreds, NewExitBB, Succ);
+ }
+ }
+ }
+ }
+
+ return NewBB;
+}
+
+void llvm::createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
+ BasicBlock *SplitBB, BasicBlock *DestBB) {
+ // SplitBB shouldn't have anything non-trivial in it yet.
+ assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
+ SplitBB->isLandingPad()) &&
+ "SplitBB has non-PHI nodes!");
+
+ // For each PHI in the destination block.
+ for (PHINode &PN : DestBB->phis()) {
+ int Idx = PN.getBasicBlockIndex(SplitBB);
+ assert(Idx >= 0 && "Invalid Block Index");
+ Value *V = PN.getIncomingValue(Idx);
+
+ // If the input is a PHI which already satisfies LCSSA, don't create
+ // a new one.
+ if (const PHINode *VP = dyn_cast<PHINode>(V))
+ if (VP->getParent() == SplitBB)
+ continue;
+
+ // Otherwise a new PHI is needed. Create one and populate it.
+ PHINode *NewPN = PHINode::Create(
+ PN.getType(), Preds.size(), "split",
+ SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
+ for (BasicBlock *BB : Preds)
+ NewPN->addIncoming(V, BB);
+
+ // Update the original PHI.
+ PN.setIncomingValue(Idx, NewPN);
+ }
+}
+
unsigned
llvm::SplitAllCriticalEdges(Function &F,
const CriticalEdgeSplittingOptions &Options) {
@@ -553,8 +766,10 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
BBName);
}
BasicBlock::iterator SplitIt = SplitPt->getIterator();
- while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
+ while (isa<PHINode>(SplitIt) || SplitIt->isEHPad()) {
++SplitIt;
+ assert(SplitIt != SplitPt->getParent()->end());
+ }
std::string Name = BBName.str();
BasicBlock *New = Old->splitBasicBlock(
SplitIt, Name.empty() ? Old->getName() + ".split" : Name);
@@ -568,8 +783,8 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
if (DTU) {
SmallVector<DominatorTree::UpdateType, 8> Updates;
// Old dominates New. New node dominates all other nodes dominated by Old.
- SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New),
- succ_end(New));
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New),
+ succ_end(New));
Updates.push_back({DominatorTree::Insert, Old, New});
Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size());
for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) {
@@ -634,8 +849,8 @@ BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
// New dominates Old. The predecessor nodes of the Old node dominate
// New node.
- SmallSetVector<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New),
- pred_end(New));
+ SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New),
+ pred_end(New));
DTUpdates.push_back({DominatorTree::Insert, New, Old});
DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size());
for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) {
@@ -666,7 +881,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
if (DTU) {
// Recalculation of DomTree is needed when updating a forward DomTree and
// the Entry BB is replaced.
- if (NewBB == &NewBB->getParent()->getEntryBlock() && DTU->hasDomTree()) {
+ if (NewBB->isEntryBlock() && DTU->hasDomTree()) {
// The entry block was removed and there is no external interface for
// the dominator tree to be notified of this change. In this corner-case
// we recalculate the entire tree.
@@ -674,7 +889,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
} else {
// Split block expects NewBB to have a non-empty set of predecessors.
SmallVector<DominatorTree::UpdateType, 8> Updates;
- SmallSetVector<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end());
+ SmallPtrSet<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end());
Updates.push_back({DominatorTree::Insert, NewBB, OldBB});
Updates.reserve(Updates.size() + 2 * UniquePreds.size());
for (auto *UniquePred : UniquePreds) {
@@ -685,7 +900,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
} else if (DT) {
if (OldBB == DT->getRootNode()->getBlock()) {
- assert(NewBB == &NewBB->getParent()->getEntryBlock());
+ assert(NewBB->isEntryBlock());
DT->setNewRoot(NewBB);
} else {
// Split block expects NewBB to have a non-empty set of predecessors.
@@ -1083,9 +1298,8 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// If the return instruction returns a value, and if the value was a
// PHI node in "BB", propagate the right value into the return.
- for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i) {
- Value *V = *i;
+ for (Use &Op : NewRet->operands()) {
+ Value *V = Op;
Instruction *NewBC = nullptr;
if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
// Return value might be bitcasted. Clone and insert it before the
@@ -1093,7 +1307,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
V = BCI->getOperand(0);
NewBC = BCI->clone();
Pred->getInstList().insert(NewRet->getIterator(), NewBC);
- *i = NewBC;
+ Op = NewBC;
}
Instruction *NewEV = nullptr;
@@ -1105,7 +1319,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
Pred->getInstList().insert(NewBC->getIterator(), NewEV);
} else {
Pred->getInstList().insert(NewRet->getIterator(), NewEV);
- *i = NewEV;
+ Op = NewEV;
}
}
@@ -1116,7 +1330,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
} else if (NewBC)
NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
else
- *i = PN->getIncomingValueForBlock(Pred);
+ Op = PN->getIncomingValueForBlock(Pred);
}
}
}
@@ -1141,8 +1355,8 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
if (DTU) {
- SmallSetVector<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail),
- succ_end(Tail));
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail),
+ succ_end(Tail));
Updates.push_back({DominatorTree::Insert, Head, Tail});
Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size());
for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) {
@@ -1242,8 +1456,8 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
}
-Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
- BasicBlock *&IfFalse) {
+BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
BasicBlock *Pred1 = nullptr;
BasicBlock *Pred2 = nullptr;
@@ -1309,7 +1523,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
return nullptr;
}
- return Pred1Br->getCondition();
+ return Pred1Br;
}
// Ok, if we got here, both predecessors end with an unconditional branch to
@@ -1331,7 +1545,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
IfTrue = Pred2;
IfFalse = Pred1;
}
- return BI->getCondition();
+ return BI;
}
// After creating a control flow hub, the operands of PHINodes in an outgoing
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 939a1a3a868d..1bb80be8ef99 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -100,46 +100,19 @@ PreservedAnalyses BreakCriticalEdgesPass::run(Function &F,
// Implementation of the external critical edge manipulation functions
//===----------------------------------------------------------------------===//
-/// When a loop exit edge is split, LCSSA form may require new PHIs in the new
-/// exit block. This function inserts the new PHIs, as needed. Preds is a list
-/// of preds inside the loop, SplitBB is the new loop exit block, and DestBB is
-/// the old loop exit, now the successor of SplitBB.
-static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
- BasicBlock *SplitBB,
- BasicBlock *DestBB) {
- // SplitBB shouldn't have anything non-trivial in it yet.
- assert((SplitBB->getFirstNonPHI() == SplitBB->getTerminator() ||
- SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
-
- // For each PHI in the destination block.
- for (PHINode &PN : DestBB->phis()) {
- unsigned Idx = PN.getBasicBlockIndex(SplitBB);
- Value *V = PN.getIncomingValue(Idx);
-
- // If the input is a PHI which already satisfies LCSSA, don't create
- // a new one.
- if (const PHINode *VP = dyn_cast<PHINode>(V))
- if (VP->getParent() == SplitBB)
- continue;
-
- // Otherwise a new PHI is needed. Create one and populate it.
- PHINode *NewPN = PHINode::Create(
- PN.getType(), Preds.size(), "split",
- SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
- for (unsigned i = 0, e = Preds.size(); i != e; ++i)
- NewPN->addIncoming(V, Preds[i]);
-
- // Update the original PHI.
- PN.setIncomingValue(Idx, NewPN);
- }
-}
-
BasicBlock *llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
const CriticalEdgeSplittingOptions &Options,
const Twine &BBName) {
if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
return nullptr;
+ return SplitKnownCriticalEdge(TI, SuccNum, Options, BBName);
+}
+
+BasicBlock *
+llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
+ const CriticalEdgeSplittingOptions &Options,
+ const Twine &BBName) {
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index dba5403f272a..35e22f7a57e2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -44,7 +44,6 @@ STATISTIC(NumSExtArg, "Number of arguments inferred as signext");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns");
-STATISTIC(NumNonNull, "Number of function returns inferred as nonnull returns");
STATISTIC(NumReturnedArg, "Number of arguments inferred as returned");
STATISTIC(NumWillReturn, "Number of functions inferred as willreturn");
@@ -166,6 +165,14 @@ static bool setArgsNoUndef(Function &F) {
return Changed;
}
+static bool setArgNoUndef(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::NoUndef))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::NoUndef);
+ ++NumNoUndef;
+ return true;
+}
+
static bool setRetAndArgsNoUndef(Function &F) {
return setRetNoUndef(F) | setArgsNoUndef(F);
}
@@ -249,12 +256,20 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
- case LibFunc_strcpy:
- case LibFunc_strncpy:
case LibFunc_strcat:
case LibFunc_strncat:
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setReturnedArg(F, 0);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setOnlyReadsMemory(F, 1);
+ Changed |= setDoesNotAlias(F, 0);
+ Changed |= setDoesNotAlias(F, 1);
+ return Changed;
+ case LibFunc_strcpy:
+ case LibFunc_strncpy:
+ Changed |= setReturnedArg(F, 0);
LLVM_FALLTHROUGH;
case LibFunc_stpcpy:
case LibFunc_stpncpy:
@@ -323,8 +338,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
- case LibFunc_strdup:
case LibFunc_strndup:
+ Changed |= setArgNoUndef(F, 1);
+ LLVM_FALLTHROUGH;
+ case LibFunc_strdup:
Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
@@ -383,7 +400,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_malloc:
case LibFunc_vec_malloc:
Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetNoUndef(F);
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
@@ -471,10 +488,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
+ Changed |= setArgNoUndef(F, 1);
return Changed;
case LibFunc_reallocf:
Changed |= setRetNoUndef(F);
Changed |= setWillReturn(F);
+ Changed |= setArgNoUndef(F, 1);
return Changed;
case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
@@ -517,7 +536,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_aligned_alloc:
Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetNoUndef(F);
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
@@ -548,8 +567,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_calloc:
case LibFunc_vec_calloc:
- Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetNoUndef(F);
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
@@ -833,7 +851,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_valloc:
Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetNoUndef(F);
+ Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
@@ -908,8 +926,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 3);
return Changed;
- case LibFunc_dunder_strdup:
case LibFunc_dunder_strndup:
+ Changed |= setArgNoUndef(F, 1);
+ LLVM_FALLTHROUGH;
+ case LibFunc_dunder_strdup:
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index bf08bf274737..87868251036c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -490,11 +490,8 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
// If byval is used, this must be a pointer type, and the byval type must
// match the element type. Update it if present.
- if (ArgAttrs.getByValType()) {
- Type *NewTy = Callee->getParamByValType(ArgNo);
- ArgAttrs.addByValAttr(
- NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType());
- }
+ if (ArgAttrs.getByValType())
+ ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo));
NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
AttributeChanged = true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 6ab061510a60..0ac9a5aaa425 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -44,7 +44,6 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
ClonedCodeInfo *CodeInfo,
DebugInfoFinder *DIFinder) {
- DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
if (BB->hasName())
NewBB->setName(BB->getName() + NameSuffix);
@@ -72,7 +71,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
}
if (CodeInfo) {
- CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
}
return NewBB;
@@ -83,8 +82,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
+ CloneFunctionChangeType Changes,
+ SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
@@ -95,6 +94,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif
+ bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
+
// Copy all attributes other than those stored in the AttributeList. We need
// to remap the parameter indices of the AttributeList.
AttributeList NewAttrs = NewFunc->getAttributes();
@@ -123,45 +124,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
OldAttrs.getRetAttributes(), NewArgAttrs));
- bool MustCloneSP =
- OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
- DISubprogram *SP = OldFunc->getSubprogram();
- if (SP) {
- assert(!MustCloneSP || ModuleLevelChanges);
- // Add mappings for some DebugInfo nodes that we don't want duplicated
- // even if they're distinct.
- auto &MD = VMap.MD();
- MD[SP->getUnit()].reset(SP->getUnit());
- MD[SP->getType()].reset(SP->getType());
- MD[SP->getFile()].reset(SP->getFile());
- // If we're not cloning into the same module, no need to clone the
- // subprogram
- if (!MustCloneSP)
- MD[SP].reset(SP);
- }
-
// Everything else beyond this point deals with function instructions,
// so if we are dealing with a function declaration, we're done.
if (OldFunc->isDeclaration())
return;
- // When we remap instructions, we want to avoid duplicating inlined
- // DISubprograms, so record all subprograms we find as we duplicate
- // instructions and then freeze them in the MD map.
- // We also record information about dbg.value and dbg.declare to avoid
- // duplicating the types.
- DebugInfoFinder DIFinder;
+ // When we remap instructions within the same module, we want to avoid
+ // duplicating inlined DISubprograms, so record all subprograms we find as we
+ // duplicate instructions and then freeze them in the MD map. We also record
+ // information about dbg.value and dbg.declare to avoid duplicating the
+ // types.
+ Optional<DebugInfoFinder> DIFinder;
+
+ // Track the subprogram attachment that needs to be cloned to fine-tune the
+ // mapping within the same module.
+ DISubprogram *SPClonedWithinModule = nullptr;
+ if (Changes < CloneFunctionChangeType::DifferentModule) {
+ assert((NewFunc->getParent() == nullptr ||
+ NewFunc->getParent() == OldFunc->getParent()) &&
+ "Expected NewFunc to have the same parent, or no parent");
+
+ // Need to find subprograms, types, and compile units.
+ DIFinder.emplace();
+
+ SPClonedWithinModule = OldFunc->getSubprogram();
+ if (SPClonedWithinModule)
+ DIFinder->processSubprogram(SPClonedWithinModule);
+ } else {
+ assert((NewFunc->getParent() == nullptr ||
+ NewFunc->getParent() != OldFunc->getParent()) &&
+ "Expected NewFunc to have different parents, or no parent");
+
+ if (Changes == CloneFunctionChangeType::DifferentModule) {
+ assert(NewFunc->getParent() &&
+ "Need parent of new function to maintain debug info invariants");
+
+ // Need to find all the compile units.
+ DIFinder.emplace();
+ }
+ }
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
- for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
- BI != BE; ++BI) {
- const BasicBlock &BB = *BI;
+ for (const BasicBlock &BB : *OldFunc) {
// Create a new basic block and copy instructions into it!
BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
- ModuleLevelChanges ? &DIFinder : nullptr);
+ DIFinder ? &*DIFinder : nullptr);
// Add basic block mapping.
VMap[&BB] = CBB;
@@ -173,8 +183,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// implementation, which generates an invalid blockaddress when
// cloning a function.)
if (BB.hasAddressTaken()) {
- Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
- const_cast<BasicBlock*>(&BB));
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
+ const_cast<BasicBlock *>(&BB));
VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
}
@@ -183,54 +193,83 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Returns.push_back(RI);
}
- for (DISubprogram *ISP : DIFinder.subprograms())
- if (ISP != SP)
- VMap.MD()[ISP].reset(ISP);
-
- for (DICompileUnit *CU : DIFinder.compile_units())
- VMap.MD()[CU].reset(CU);
-
- for (DIType *Type : DIFinder.types())
- VMap.MD()[Type].reset(Type);
+ if (Changes < CloneFunctionChangeType::DifferentModule &&
+ DIFinder->subprogram_count() > 0) {
+ // Turn on module-level changes, since we need to clone (some of) the
+ // debug info metadata.
+ //
+ // FIXME: Metadata effectively owned by a function should be made
+ // local, and only that local metadata should be cloned.
+ ModuleLevelChanges = true;
+
+ auto mapToSelfIfNew = [&VMap](MDNode *N) {
+ // Avoid clobbering an existing mapping.
+ (void)VMap.MD().try_emplace(N, N);
+ };
+
+ // Avoid cloning types, compile units, and (other) subprograms.
+ for (DISubprogram *ISP : DIFinder->subprograms())
+ if (ISP != SPClonedWithinModule)
+ mapToSelfIfNew(ISP);
+
+ for (DICompileUnit *CU : DIFinder->compile_units())
+ mapToSelfIfNew(CU);
+
+ for (DIType *Type : DIFinder->types())
+ mapToSelfIfNew(Type);
+ } else {
+ assert(!SPClonedWithinModule &&
+ "Subprogram should be in DIFinder->subprogram_count()...");
+ }
+ const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
// Duplicate the metadata that is attached to the cloned function.
// Subprograms/CUs/types that were already mapped to themselves won't be
// duplicated.
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
for (auto MD : MDs) {
- NewFunc->addMetadata(
- MD.first,
- *MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer));
+ NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
+ TypeMapper, Materializer));
}
- // Loop over all of the instructions in the function, fixing up operand
- // references as we go. This uses VMap to do all the hard work.
- for (Function::iterator BB =
- cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
- BE = NewFunc->end();
+ // Loop over all of the instructions in the new function, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (Function::iterator
+ BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+ BE = NewFunc->end();
BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (Instruction &II : *BB)
- RemapInstruction(&II, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
-
- // Register all DICompileUnits of the old parent module in the new parent module
- auto* OldModule = OldFunc->getParent();
- auto* NewModule = NewFunc->getParent();
- if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) {
- auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
- // Avoid multiple insertions of the same DICompileUnit to NMD.
- SmallPtrSet<const void*, 8> Visited;
- for (auto* Operand : NMD->operands())
- Visited.insert(Operand);
- for (auto* Unit : DIFinder.compile_units())
- // VMap.MD()[Unit] == Unit
- if (Visited.insert(Unit).second)
- NMD->addOperand(Unit);
+ RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+
+ // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
+ // same module, the compile unit will already be listed (or not). When
+ // cloning a module, CloneModule() will handle creating the named metadata.
+ if (Changes != CloneFunctionChangeType::DifferentModule)
+ return;
+
+ // Update !llvm.dbg.cu with compile units added to the new module if this
+ // function is being cloned in isolation.
+ //
+ // FIXME: This is making global / module-level changes, which doesn't seem
+ // like the right encapsulation Consider dropping the requirement to update
+ // !llvm.dbg.cu (either obsoleting the node, or restricting it to
+ // non-discardable compile units) instead of discovering compile units by
+ // visiting the metadata attached to global values, which would allow this
+ // code to be deleted. Alternatively, perhaps give responsibility for this
+ // update to CloneFunctionInto's callers.
+ auto *NewModule = NewFunc->getParent();
+ auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
+ // Avoid multiple insertions of the same DICompileUnit to NMD.
+ SmallPtrSet<const void *, 8> Visited;
+ for (auto *Operand : NMD->operands())
+ Visited.insert(Operand);
+ for (auto *Unit : DIFinder->compile_units()) {
+ MDNode *MappedUnit =
+ MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);
+ if (Visited.insert(MappedUnit).second)
+ NMD->addOperand(MappedUnit);
}
}
@@ -243,7 +282,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
///
Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo) {
- std::vector<Type*> ArgTypes;
+ std::vector<Type *> ArgTypes;
// The user might be deleting arguments to the function by specifying them in
// the VMap. If so, we need to not add the arguments to the arg ty vector
@@ -253,8 +292,9 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
ArgTypes.push_back(I.getType());
// Create a new function type...
- FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
- ArgTypes, F->getFunctionType()->isVarArg());
+ FunctionType *FTy =
+ FunctionType::get(F->getFunctionType()->getReturnType(), ArgTypes,
+ F->getFunctionType()->isVarArg());
// Create the new function...
Function *NewF = Function::Create(FTy, F->getLinkage(), F->getAddressSpace(),
@@ -262,61 +302,60 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
// Loop over the arguments, copying the names of the mapped arguments over...
Function::arg_iterator DestI = NewF->arg_begin();
- for (const Argument & I : F->args())
+ for (const Argument &I : F->args())
if (VMap.count(&I) == 0) { // Is this argument preserved?
DestI->setName(I.getName()); // Copy the name over...
VMap[&I] = &*DestI++; // Add mapping to VMap
}
- SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
- CodeInfo);
+ SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
+ CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly,
+ Returns, "", CodeInfo);
return NewF;
}
-
-
namespace {
- /// This is a private class used to implement CloneAndPruneFunctionInto.
- struct PruningFunctionCloner {
- Function *NewFunc;
- const Function *OldFunc;
- ValueToValueMapTy &VMap;
- bool ModuleLevelChanges;
- const char *NameSuffix;
- ClonedCodeInfo *CodeInfo;
-
- public:
- PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
- ValueToValueMapTy &valueMap, bool moduleLevelChanges,
- const char *nameSuffix, ClonedCodeInfo *codeInfo)
- : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
- ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo) {}
-
- /// The specified block is found to be reachable, clone it and
- /// anything that it can reach.
- void CloneBlock(const BasicBlock *BB,
- BasicBlock::const_iterator StartingInst,
- std::vector<const BasicBlock*> &ToClone);
- };
-}
+/// This is a private class used to implement CloneAndPruneFunctionInto.
+struct PruningFunctionCloner {
+ Function *NewFunc;
+ const Function *OldFunc;
+ ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
+ const char *NameSuffix;
+ ClonedCodeInfo *CodeInfo;
+
+public:
+ PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+ ValueToValueMapTy &valueMap, bool moduleLevelChanges,
+ const char *nameSuffix, ClonedCodeInfo *codeInfo)
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
+ ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
+ CodeInfo(codeInfo) {}
+
+ /// The specified block is found to be reachable, clone it and
+ /// anything that it can reach.
+ void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst,
+ std::vector<const BasicBlock *> &ToClone);
+};
+} // namespace
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
-void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
- BasicBlock::const_iterator StartingInst,
- std::vector<const BasicBlock*> &ToClone){
+void PruningFunctionCloner::CloneBlock(
+ const BasicBlock *BB, BasicBlock::const_iterator StartingInst,
+ std::vector<const BasicBlock *> &ToClone) {
WeakTrackingVH &BBEntry = VMap[BB];
// Have we already cloned this block?
- if (BBEntry) return;
+ if (BBEntry)
+ return;
// Nope, clone it now.
BasicBlock *NewBB;
BBEntry = NewBB = BasicBlock::Create(BB->getContext());
- if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+ if (BB->hasName())
+ NewBB->setName(BB->getName() + NameSuffix);
// It is only legal to clone a function if a block address within that
// function is never referenced outside of the function. Given that, we
@@ -328,8 +367,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Note that we don't need to fix the mapping for unreachable blocks;
// the default mapping there is safe.
if (BB->hasAddressTaken()) {
- Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
- const_cast<BasicBlock*>(BB));
+ Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
+ const_cast<BasicBlock *>(BB));
VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
}
@@ -337,8 +376,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
- for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
- II != IE; ++II) {
+ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
+ ++II) {
Instruction *NewInst = II->clone();
@@ -368,15 +407,17 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
if (II->hasName())
- NewInst->setName(II->getName()+NameSuffix);
+ NewInst->setName(II->getName() + NameSuffix);
VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
- if (CodeInfo)
+ if (CodeInfo) {
+ CodeInfo->OrigVMap[&*II] = NewInst;
if (auto *CB = dyn_cast<CallBase>(&*II))
if (CB->hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
+ }
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@@ -414,9 +455,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Value *V = VMap.lookup(SI->getCondition());
Cond = dyn_cast_or_null<ConstantInt>(V);
}
- if (Cond) { // Constant fold to uncond branch!
+ if (Cond) { // Constant fold to uncond branch!
SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
- BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
+ BasicBlock *Dest = const_cast<BasicBlock *>(Case.getCaseSuccessor());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
@@ -426,24 +467,26 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (!TerminatorDone) {
Instruction *NewInst = OldTI->clone();
if (OldTI->hasName())
- NewInst->setName(OldTI->getName()+NameSuffix);
+ NewInst->setName(OldTI->getName() + NameSuffix);
NewBB->getInstList().push_back(NewInst);
- VMap[OldTI] = NewInst; // Add instruction map to value.
+ VMap[OldTI] = NewInst; // Add instruction map to value.
- if (CodeInfo)
+ if (CodeInfo) {
+ CodeInfo->OrigVMap[OldTI] = NewInst;
if (auto *CB = dyn_cast<CallBase>(OldTI))
if (CB->hasOperandBundles())
CodeInfo->OperandBundleCallSites.push_back(NewInst);
+ }
// Recursively clone any reachable successor blocks.
append_range(ToClone, successors(BB->getTerminator()));
}
if (CodeInfo) {
- CodeInfo->ContainsCalls |= hasCalls;
+ CodeInfo->ContainsCalls |= hasCalls;
CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
- CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
- BB != &BB->getParent()->front();
+ CodeInfo->ContainsDynamicAllocas |=
+ hasStaticAllocas && BB != &BB->getParent()->front();
}
}
@@ -481,7 +524,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
// Clone the entry block, and anything recursively reachable from it.
- std::vector<const BasicBlock*> CloneWorklist;
+ std::vector<const BasicBlock *> CloneWorklist;
PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
@@ -494,11 +537,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// insert it into the new function in the right order. If not, ignore it.
//
// Defer PHI resolution until rest of function is resolved.
- SmallVector<const PHINode*, 16> PHIToResolve;
+ SmallVector<const PHINode *, 16> PHIToResolve;
for (const BasicBlock &BI : *OldFunc) {
Value *V = VMap.lookup(&BI);
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
- if (!NewBB) continue; // Dead block.
+ if (!NewBB)
+ continue; // Dead block.
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
@@ -523,7 +567,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Defer PHI resolution until rest of function is resolved, PHI resolution
// requires the CFG to be up-to-date.
- for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+ for (unsigned phino = 0, e = PHIToResolve.size(); phino != e;) {
const PHINode *OPN = PHIToResolve[phino];
unsigned NumPreds = OPN->getNumIncomingValues();
const BasicBlock *OldBB = OPN->getParent();
@@ -532,21 +576,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Map operands for blocks that are live and remove operands for blocks
// that are dead.
for (; phino != PHIToResolve.size() &&
- PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+ PHIToResolve[phino]->getParent() == OldBB;
+ ++phino) {
OPN = PHIToResolve[phino];
PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
Value *V = VMap.lookup(PN->getIncomingBlock(pred));
if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
- Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ Value *InVal =
+ MapValue(PN->getIncomingValue(pred), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
} else {
PN->removeIncomingValue(pred, false);
- --pred; // Revisit the next entry.
+ --pred; // Revisit the next entry.
--e;
}
}
@@ -562,10 +607,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
if (NumPreds != PN->getNumIncomingValues()) {
assert(NumPreds < PN->getNumIncomingValues());
// Count how many times each predecessor comes to this block.
- std::map<BasicBlock*, unsigned> PredCount;
- for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
- PI != E; ++PI)
- --PredCount[*PI];
+ std::map<BasicBlock *, unsigned> PredCount;
+ for (BasicBlock *Pred : predecessors(NewBB))
+ --PredCount[Pred];
// Figure out how many entries to remove from each PHI.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -683,11 +727,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
- if (!BI || BI->isConditional()) { ++I; continue; }
+ if (!BI || BI->isConditional()) {
+ ++I;
+ continue;
+ }
BasicBlock *Dest = BI->getSuccessor(0);
if (!Dest->getSinglePredecessor()) {
- ++I; continue;
+ ++I;
+ continue;
}
// We shouldn't be able to get single-entry PHI nodes here, as instsimplify
@@ -720,7 +768,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
Returns.push_back(RI);
}
-
/// This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
/// effect of this is to copy significantly less code in cases where (for
@@ -728,13 +775,10 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
/// constant arguments cause a significant amount of code in the callee to be
/// dead. Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
- ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo,
- Instruction *TheCall) {
+void llvm::CloneAndPruneFunctionInto(
+ Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}
@@ -885,10 +929,9 @@ BasicBlock *llvm::DuplicateInstructionsInSplitBetween(
return NewBB;
}
-void llvm::cloneNoAliasScopes(
- ArrayRef<MDNode *> NoAliasDeclScopes,
- DenseMap<MDNode *, MDNode *> &ClonedScopes,
- StringRef Ext, LLVMContext &Context) {
+void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ StringRef Ext, LLVMContext &Context) {
MDBuilder MDB(Context);
for (auto *ScopeList : NoAliasDeclScopes) {
@@ -911,9 +954,9 @@ void llvm::cloneNoAliasScopes(
}
}
-void llvm::adaptNoAliasScopes(
- Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes,
- LLVMContext &Context) {
+void llvm::adaptNoAliasScopes(Instruction *I,
+ const DenseMap<MDNode *, MDNode *> &ClonedScopes,
+ LLVMContext &Context) {
auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * {
bool NeedsReplacement = false;
SmallVector<Metadata *, 8> NewScopeList;
@@ -945,9 +988,9 @@ void llvm::adaptNoAliasScopes(
replaceWhenNeeded(LLVMContext::MD_alias_scope);
}
-void llvm::cloneAndAdaptNoAliasScopes(
- ArrayRef<MDNode *> NoAliasDeclScopes,
- ArrayRef<BasicBlock *> NewBlocks, LLVMContext &Context, StringRef Ext) {
+void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ ArrayRef<BasicBlock *> NewBlocks,
+ LLVMContext &Context, StringRef Ext) {
if (NoAliasDeclScopes.empty())
return;
@@ -962,9 +1005,9 @@ void llvm::cloneAndAdaptNoAliasScopes(
adaptNoAliasScopes(&I, ClonedScopes, Context);
}
-void llvm::cloneAndAdaptNoAliasScopes(
- ArrayRef<MDNode *> NoAliasDeclScopes, Instruction *IStart,
- Instruction *IEnd, LLVMContext &Context, StringRef Ext) {
+void llvm::cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
+ Instruction *IStart, Instruction *IEnd,
+ LLVMContext &Context, StringRef Ext) {
if (NoAliasDeclScopes.empty())
return;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
index a6327bbf21bc..eb226b9b246d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -115,28 +115,26 @@ std::unique_ptr<Module> llvm::CloneModule(
// have been created, loop through and copy the global variable referrers
// over... We also set the attributes on the global now.
//
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+ for (const GlobalVariable &G : M.globals()) {
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[&G]);
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- I->getAllMetadata(MDs);
+ G.getAllMetadata(MDs);
for (auto MD : MDs)
- GV->addMetadata(MD.first,
- *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
+ GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
- if (I->isDeclaration())
+ if (G.isDeclaration())
continue;
- if (!ShouldCloneDefinition(&*I)) {
+ if (!ShouldCloneDefinition(&G)) {
// Skip after setting the correct linkage for an external reference.
GV->setLinkage(GlobalValue::ExternalLinkage);
continue;
}
- if (I->hasInitializer())
- GV->setInitializer(MapValue(I->getInitializer(), VMap));
+ if (G.hasInitializer())
+ GV->setInitializer(MapValue(G.getInitializer(), VMap));
- copyComdat(GV, &*I);
+ copyComdat(GV, &G);
}
// Similarly, copy over function bodies now...
@@ -162,7 +160,8 @@ std::unique_ptr<Module> llvm::CloneModule(
}
SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, &I, VMap, /*ModuleLevelChanges=*/true, Returns);
+ CloneFunctionInto(F, &I, VMap, CloneFunctionChangeType::ClonedModule,
+ Returns);
if (I.hasPersonalityFn())
F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
@@ -182,25 +181,13 @@ std::unique_ptr<Module> llvm::CloneModule(
}
// And named metadata....
- const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu");
for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; ++I) {
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
- if (&NMD == LLVM_DBG_CU) {
- // Do not insert duplicate operands.
- SmallPtrSet<const void*, 8> Visited;
- for (const auto* Operand : NewNMD->operands())
- Visited.insert(Operand);
- for (const auto* Operand : NMD.operands()) {
- auto* MappedOperand = MapMetadata(Operand, VMap);
- if (Visited.insert(MappedOperand).second)
- NewNMD->addOperand(MappedOperand);
- }
- } else
- for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap));
}
return New;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 390925a03b73..9edc52b53550 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -333,7 +333,7 @@ void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
MemAddr = LI->getPointerOperand();
}
// Global variable can not be aliased with locals.
- if (dyn_cast<Constant>(MemAddr))
+ if (isa<Constant>(MemAddr))
break;
Value *Base = MemAddr->stripInBoundsConstantOffsets();
if (!isa<AllocaInst>(Base)) {
@@ -426,9 +426,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
CommonExitBlock->getFirstNonPHI()->getIterator());
- for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock);
- PI != PE;) {
- BasicBlock *Pred = *PI++;
+ for (BasicBlock *Pred :
+ llvm::make_early_inc_range(predecessors(CommonExitBlock))) {
if (Blocks.count(Pred))
continue;
Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
@@ -903,6 +902,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::Convergent:
case Attribute::Dereferenceable:
case Attribute::DereferenceableOrNull:
+ case Attribute::ElementType:
case Attribute::InAlloca:
case Attribute::InReg:
case Attribute::InaccessibleMemOnly:
@@ -930,6 +930,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StructRet:
case Attribute::SwiftError:
case Attribute::SwiftSelf:
+ case Attribute::SwiftAsync:
case Attribute::WillReturn:
case Attribute::WriteOnly:
case Attribute::ZExt:
@@ -954,6 +955,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NonLazyBind:
case Attribute::NoRedZone:
case Attribute::NoUnwind:
+ case Attribute::NoSanitizeCoverage:
case Attribute::NullPointerIsValid:
case Attribute::OptForFuzzing:
case Attribute::OptimizeNone:
@@ -971,6 +973,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StackProtectStrong:
case Attribute::StrictFP:
case Attribute::UWTable:
+ case Attribute::VScaleRange:
case Attribute::NoCfCheck:
case Attribute::MustProgress:
case Attribute::NoProfile:
@@ -1161,9 +1164,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
AllocaInst *Struct = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
std::vector<Type *> ArgTypes;
- for (ValueSet::iterator v = StructValues.begin(),
- ve = StructValues.end(); v != ve; ++v)
- ArgTypes.push_back((*v)->getType());
+ for (Value *V : StructValues)
+ ArgTypes.push_back(V->getType());
// Allocate a struct at the beginning of this function
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
@@ -1513,20 +1515,19 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
continue;
}
- // If the location isn't a constant or an instruction, delete the
- // intrinsic.
- auto *DVI = cast<DbgVariableIntrinsic>(DII);
- Value *Location = DVI->getVariableLocation();
- if (!Location ||
- (!isa<Constant>(Location) && !isa<Instruction>(Location))) {
- DebugIntrinsicsToDelete.push_back(DVI);
- continue;
- }
+ auto IsInvalidLocation = [&NewFunc](Value *Location) {
+ // Location is invalid if it isn't a constant or an instruction, or is an
+ // instruction but isn't in the new function.
+ if (!Location ||
+ (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+ return true;
+ Instruction *LocationInst = dyn_cast<Instruction>(Location);
+ return LocationInst && LocationInst->getFunction() != &NewFunc;
+ };
- // If the variable location is an instruction but isn't in the new
- // function, delete the intrinsic.
- Instruction *LocationInst = dyn_cast<Instruction>(Location);
- if (LocationInst && LocationInst->getFunction() != &NewFunc) {
+ auto *DVI = cast<DbgVariableIntrinsic>(DII);
+ // If any of the used locations are invalid, delete the intrinsic.
+ if (any_of(DVI->location_ops(), IsInvalidLocation)) {
DebugIntrinsicsToDelete.push_back(DVI);
continue;
}
@@ -1539,7 +1540,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
NewSP, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
OldVar->getAlignInBits());
- DVI->setArgOperand(1, MetadataAsValue::get(Ctx, NewVar));
+ DVI->setVariable(cast<DILocalVariable>(NewVar));
}
for (auto *DII : DebugIntrinsicsToDelete)
DII->eraseFromParent();
@@ -1552,10 +1553,11 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP));
// Loop info metadata may contain line locations. Fix them up.
- auto updateLoopInfoLoc = [&Ctx,
- NewSP](const DILocation &Loc) -> DILocation * {
- return DILocation::get(Ctx, Loc.getLine(), Loc.getColumn(), NewSP,
- nullptr);
+ auto updateLoopInfoLoc = [&Ctx, NewSP](Metadata *MD) -> Metadata * {
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return DILocation::get(Ctx, Loc->getLine(), Loc->getColumn(), NewSP,
+ nullptr);
+ return MD;
};
updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
}
@@ -1595,10 +1597,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
Instruction *I = &*It;
++It;
- if (match(I, m_Intrinsic<Intrinsic::assume>())) {
+ if (auto *AI = dyn_cast<AssumeInst>(I)) {
if (AC)
- AC->unregisterAssumption(cast<CallInst>(I));
- I->eraseFromParent();
+ AC->unregisterAssumption(AI);
+ AI->eraseFromParent();
}
}
}
@@ -1612,15 +1614,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
SmallPtrSet<BasicBlock *, 1> ExitBlocks;
for (BasicBlock *Block : Blocks) {
- for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
- ++SI) {
- if (!Blocks.count(*SI)) {
+ for (BasicBlock *Succ : successors(Block)) {
+ if (!Blocks.count(Succ)) {
// Update the branch weight for this successor.
if (BFI) {
- BlockFrequency &BF = ExitWeights[*SI];
- BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
+ BlockFrequency &BF = ExitWeights[Succ];
+ BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ);
}
- ExitBlocks.insert(*SI);
+ ExitBlocks.insert(Succ);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
index 3e4d53c10dc9..30c3fa521d52 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -1,4 +1,4 @@
-//===- Debugify.cpp - Attach synthetic debug info to everything -----------===//
+//===- Debugify.cpp - Check debug info preservation in optimizations ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,10 @@
//
//===----------------------------------------------------------------------===//
///
-/// \file This pass attaches synthetic debug info to everything. It can be used
-/// to create targeted tests for debug info preservation.
+/// \file In the `synthetic` mode, the `-debugify` attaches synthetic debug info
+/// to everything. It can be used to create targeted tests for debug info
+/// preservation. In addition, when using the `original` mode, it can check
+/// original debug info preservation. The `synthetic` mode is default one.
///
//===----------------------------------------------------------------------===//
@@ -23,6 +25,10 @@
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+
+#define DEBUG_TYPE "debugify"
using namespace llvm;
@@ -35,6 +41,8 @@ enum class Level {
Locations,
LocationsAndVariables
};
+
+// Used for the synthetic mode only.
cl::opt<Level> DebugifyLevel(
"debugify-level", cl::desc("Kind of debug info to add"),
cl::values(clEnumValN(Level::Locations, "locations", "Locations only"),
@@ -199,16 +207,33 @@ bool llvm::applyDebugifyMetadata(
return true;
}
-static bool applyDebugify(Function &F) {
+static bool
+applyDebugify(Function &F,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ StringRef NameOfWrappedPass = "") {
Module &M = *F.getParent();
auto FuncIt = F.getIterator();
- return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
- "FunctionDebugify: ", /*ApplyToMF=*/nullptr);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ "FunctionDebugify: ", /*ApplyToMF*/ nullptr);
+ assert(DIPreservationMap);
+ return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap,
+ "FunctionDebugify (original debuginfo)",
+ NameOfWrappedPass);
}
-static bool applyDebugify(Module &M) {
- return applyDebugifyMetadata(M, M.functions(),
- "ModuleDebugify: ", /*ApplyToMF=*/nullptr);
+static bool
+applyDebugify(Module &M,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ StringRef NameOfWrappedPass = "") {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap,
+ "ModuleDebugify (original debuginfo)",
+ NameOfWrappedPass);
}
bool llvm::stripDebugifyMetadata(Module &M) {
@@ -256,6 +281,355 @@ bool llvm::stripDebugifyMetadata(Module &M) {
return Changed;
}
+bool llvm::collectDebugInfoMetadata(Module &M,
+ iterator_range<Module::iterator> Functions,
+ DebugInfoPerPassMap &DIPreservationMap,
+ StringRef Banner,
+ StringRef NameOfWrappedPass) {
+ LLVM_DEBUG(dbgs() << Banner << ": (before) " << NameOfWrappedPass << '\n');
+
+ // Clear the map with the debug info before every single pass.
+ DIPreservationMap.clear();
+
+ if (!M.getNamedMetadata("llvm.dbg.cu")) {
+ dbg() << Banner << ": Skipping module without debug info\n";
+ return false;
+ }
+
+ // Visit each instruction.
+ for (Function &F : Functions) {
+ if (isFunctionSkipped(F))
+ continue;
+
+ // Collect the DISubprogram.
+ auto *SP = F.getSubprogram();
+ DIPreservationMap[NameOfWrappedPass].DIFunctions.insert({F.getName(), SP});
+ if (SP) {
+ LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n');
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ DIPreservationMap[NameOfWrappedPass].DIVariables[DV] = 0;
+ }
+ }
+ }
+
+ for (BasicBlock &BB : F) {
+ // Collect debug locations (!dbg) and debug variable intrinsics.
+ for (Instruction &I : BB) {
+ // Skip PHIs.
+ if (isa<PHINode>(I))
+ continue;
+
+ // Collect dbg.values and dbg.declares.
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (!SP)
+ continue;
+ // Skip inlined variables.
+ if (I.getDebugLoc().getInlinedAt())
+ continue;
+ // Skip undef values.
+ if (DVI->isUndef())
+ continue;
+
+ auto *Var = DVI->getVariable();
+ DIPreservationMap[NameOfWrappedPass].DIVariables[Var]++;
+ continue;
+ }
+
+ // Skip debug instructions other than dbg.value and dbg.declare.
+ if (isa<DbgInfoIntrinsic>(&I))
+ continue;
+
+ LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
+ DIPreservationMap[NameOfWrappedPass].InstToDelete.insert({&I, &I});
+
+ const DILocation *Loc = I.getDebugLoc().get();
+ bool HasLoc = Loc != nullptr;
+ DIPreservationMap[NameOfWrappedPass].DILocations.insert({&I, HasLoc});
+ }
+ }
+ }
+
+ return true;
+}
+
+// This checks the preservation of original debug info attached to functions.
+static bool checkFunctions(const DebugFnMap &DIFunctionsBefore,
+ const DebugFnMap &DIFunctionsAfter,
+ StringRef NameOfWrappedPass,
+ StringRef FileNameFromCU, bool ShouldWriteIntoJSON,
+ llvm::json::Array &Bugs) {
+ bool Preserved = true;
+ for (const auto &F : DIFunctionsAfter) {
+ if (F.second)
+ continue;
+ auto SPIt = DIFunctionsBefore.find(F.first);
+ if (SPIt == DIFunctionsBefore.end()) {
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"},
+ {"name", F.first},
+ {"action", "not-generate"}}));
+ else
+ dbg() << "ERROR: " << NameOfWrappedPass
+ << " did not generate DISubprogram for " << F.first << " from "
+ << FileNameFromCU << '\n';
+ Preserved = false;
+ } else {
+ auto SP = SPIt->second;
+ if (!SP)
+ continue;
+ // If the function had the SP attached before the pass, consider it as
+ // a debug info bug.
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"},
+ {"name", F.first},
+ {"action", "drop"}}));
+ else
+ dbg() << "ERROR: " << NameOfWrappedPass << " dropped DISubprogram of "
+ << F.first << " from " << FileNameFromCU << '\n';
+ Preserved = false;
+ }
+ }
+
+ return Preserved;
+}
+
+// This checks the preservation of the original debug info attached to
+// instructions.
+static bool checkInstructions(const DebugInstMap &DILocsBefore,
+ const DebugInstMap &DILocsAfter,
+ const WeakInstValueMap &InstToDelete,
+ StringRef NameOfWrappedPass,
+ StringRef FileNameFromCU,
+ bool ShouldWriteIntoJSON,
+ llvm::json::Array &Bugs) {
+ bool Preserved = true;
+ for (const auto &L : DILocsAfter) {
+ if (L.second)
+ continue;
+ auto Instr = L.first;
+
+ // In order to avoid pointer reuse/recycling, skip the values that might
+ // have been deleted during a pass.
+ auto WeakInstrPtr = InstToDelete.find(Instr);
+ if (WeakInstrPtr != InstToDelete.end() && !WeakInstrPtr->second)
+ continue;
+
+ auto FnName = Instr->getFunction()->getName();
+ auto BB = Instr->getParent();
+ auto BBName = BB->hasName() ? BB->getName() : "no-name";
+ auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
+
+ auto InstrIt = DILocsBefore.find(Instr);
+ if (InstrIt == DILocsBefore.end()) {
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
+ {"fn-name", FnName.str()},
+ {"bb-name", BBName.str()},
+ {"instr", InstName},
+ {"action", "not-generate"}}));
+ else
+ dbg() << "WARNING: " << NameOfWrappedPass
+ << " did not generate DILocation for " << *Instr
+ << " (BB: " << BBName << ", Fn: " << FnName
+ << ", File: " << FileNameFromCU << ")\n";
+ Preserved = false;
+ } else {
+ if (!InstrIt->second)
+ continue;
+ // If the instr had the !dbg attached before the pass, consider it as
+ // a debug info issue.
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
+ {"fn-name", FnName.str()},
+ {"bb-name", BBName.str()},
+ {"instr", InstName},
+ {"action", "drop"}}));
+ else
+ dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
+ << *Instr << " (BB: " << BBName << ", Fn: " << FnName
+ << ", File: " << FileNameFromCU << ")\n";
+ Preserved = false;
+ }
+ }
+
+ return Preserved;
+}
+
+// This checks the preservation of original debug variable intrinsics.
+static bool checkVars(const DebugVarMap &DIFunctionsBefore,
+ const DebugVarMap &DIFunctionsAfter,
+ StringRef NameOfWrappedPass, StringRef FileNameFromCU,
+ bool ShouldWriteIntoJSON, llvm::json::Array &Bugs) {
+ bool Preserved = true;
+ for (const auto &V : DIFunctionsBefore) {
+ auto VarIt = DIFunctionsAfter.find(V.first);
+ if (VarIt == DIFunctionsAfter.end())
+ continue;
+
+ unsigned NumOfDbgValsAfter = VarIt->second;
+
+ if (V.second > NumOfDbgValsAfter) {
+ if (ShouldWriteIntoJSON)
+ Bugs.push_back(llvm::json::Object(
+ {{"metadata", "dbg-var-intrinsic"},
+ {"name", V.first->getName()},
+ {"fn-name", V.first->getScope()->getSubprogram()->getName()},
+ {"action", "drop"}}));
+ else
+ dbg() << "WARNING: " << NameOfWrappedPass
+ << " drops dbg.value()/dbg.declare() for " << V.first->getName()
+ << " from "
+ << "function " << V.first->getScope()->getSubprogram()->getName()
+ << " (file " << FileNameFromCU << ")\n";
+ Preserved = false;
+ }
+ }
+
+ return Preserved;
+}
+
+// Write the json data into the specifed file.
+static void writeJSON(StringRef OrigDIVerifyBugsReportFilePath,
+ StringRef FileNameFromCU, StringRef NameOfWrappedPass,
+ llvm::json::Array &Bugs) {
+ std::error_code EC;
+ raw_fd_ostream OS_FILE{OrigDIVerifyBugsReportFilePath, EC,
+ sys::fs::OF_Append | sys::fs::OF_TextWithCRLF};
+ if (EC) {
+ errs() << "Could not open file: " << EC.message() << ", "
+ << OrigDIVerifyBugsReportFilePath << '\n';
+ return;
+ }
+
+ OS_FILE << "{\"file\":\"" << FileNameFromCU << "\", ";
+
+ StringRef PassName = NameOfWrappedPass != "" ? NameOfWrappedPass : "no-name";
+ OS_FILE << "\"pass\":\"" << PassName << "\", ";
+
+ llvm::json::Value BugsToPrint{std::move(Bugs)};
+ OS_FILE << "\"bugs\": " << BugsToPrint;
+
+ OS_FILE << "}\n";
+}
+
+bool llvm::checkDebugInfoMetadata(Module &M,
+ iterator_range<Module::iterator> Functions,
+ DebugInfoPerPassMap &DIPreservationMap,
+ StringRef Banner, StringRef NameOfWrappedPass,
+ StringRef OrigDIVerifyBugsReportFilePath) {
+ LLVM_DEBUG(dbgs() << Banner << ": (after) " << NameOfWrappedPass << '\n');
+
+ if (!M.getNamedMetadata("llvm.dbg.cu")) {
+ dbg() << Banner << ": Skipping module without debug info\n";
+ return false;
+ }
+
+ // Map the debug info holding DIs after a pass.
+ DebugInfoPerPassMap DIPreservationAfter;
+
+ // Visit each instruction.
+ for (Function &F : Functions) {
+ if (isFunctionSkipped(F))
+ continue;
+
+ // TODO: Collect metadata other than DISubprograms.
+ // Collect the DISubprogram.
+ auto *SP = F.getSubprogram();
+ DIPreservationAfter[NameOfWrappedPass].DIFunctions.insert(
+ {F.getName(), SP});
+
+ if (SP) {
+ LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n');
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ DIPreservationAfter[NameOfWrappedPass].DIVariables[DV] = 0;
+ }
+ }
+ }
+
+ for (BasicBlock &BB : F) {
+ // Collect debug locations (!dbg) and debug variable intrinsics.
+ for (Instruction &I : BB) {
+ // Skip PHIs.
+ if (isa<PHINode>(I))
+ continue;
+
+ // Collect dbg.values and dbg.declares.
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (!SP)
+ continue;
+ // Skip inlined variables.
+ if (I.getDebugLoc().getInlinedAt())
+ continue;
+ // Skip undef values.
+ if (DVI->isUndef())
+ continue;
+
+ auto *Var = DVI->getVariable();
+ DIPreservationAfter[NameOfWrappedPass].DIVariables[Var]++;
+ continue;
+ }
+
+ // Skip debug instructions other than dbg.value and dbg.declare.
+ if (isa<DbgInfoIntrinsic>(&I))
+ continue;
+
+ LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
+
+ const DILocation *Loc = I.getDebugLoc().get();
+ bool HasLoc = Loc != nullptr;
+
+ DIPreservationAfter[NameOfWrappedPass].DILocations.insert({&I, HasLoc});
+ }
+ }
+ }
+
+ // TODO: The name of the module could be read better?
+ StringRef FileNameFromCU =
+ (cast<DICompileUnit>(M.getNamedMetadata("llvm.dbg.cu")->getOperand(0)))
+ ->getFilename();
+
+ auto DIFunctionsBefore = DIPreservationMap[NameOfWrappedPass].DIFunctions;
+ auto DIFunctionsAfter = DIPreservationAfter[NameOfWrappedPass].DIFunctions;
+
+ auto DILocsBefore = DIPreservationMap[NameOfWrappedPass].DILocations;
+ auto DILocsAfter = DIPreservationAfter[NameOfWrappedPass].DILocations;
+
+ auto InstToDelete = DIPreservationAfter[NameOfWrappedPass].InstToDelete;
+
+ auto DIVarsBefore = DIPreservationMap[NameOfWrappedPass].DIVariables;
+ auto DIVarsAfter = DIPreservationAfter[NameOfWrappedPass].DIVariables;
+
+ bool ShouldWriteIntoJSON = !OrigDIVerifyBugsReportFilePath.empty();
+ llvm::json::Array Bugs;
+
+ bool ResultForFunc =
+ checkFunctions(DIFunctionsBefore, DIFunctionsAfter, NameOfWrappedPass,
+ FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+ bool ResultForInsts = checkInstructions(
+ DILocsBefore, DILocsAfter, InstToDelete, NameOfWrappedPass,
+ FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+
+ bool ResultForVars = checkVars(DIVarsBefore, DIVarsAfter, NameOfWrappedPass,
+ FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+
+ bool Result = ResultForFunc && ResultForInsts && ResultForVars;
+
+ StringRef ResultBanner = NameOfWrappedPass != "" ? NameOfWrappedPass : Banner;
+ if (ShouldWriteIntoJSON && !Bugs.empty())
+ writeJSON(OrigDIVerifyBugsReportFilePath, FileNameFromCU, NameOfWrappedPass,
+ Bugs);
+
+ if (Result)
+ dbg() << ResultBanner << ": PASS\n";
+ else
+ dbg() << ResultBanner << ": FAIL\n";
+
+ LLVM_DEBUG(dbgs() << "\n\n");
+ return Result;
+}
+
namespace {
/// Return true if a mis-sized diagnostic is issued for \p DVI.
bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
@@ -264,15 +638,16 @@ bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
//
// TODO: This, along with a check for non-null value operands, should be
// promoted to verifier failures.
- Value *V = DVI->getValue();
- if (!V)
- return false;
// For now, don't try to interpret anything more complicated than an empty
// DIExpression. Eventually we should try to handle OP_deref and fragments.
if (DVI->getExpression()->getNumElements())
return false;
+ Value *V = DVI->getVariableLocationOp(0);
+ if (!V)
+ return false;
+
Type *Ty = V->getType();
uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty);
Optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits();
@@ -331,7 +706,7 @@ bool checkDebugifyMetadata(Module &M,
// Find missing lines.
for (Instruction &I : instructions(F)) {
- if (isa<DbgValueInst>(&I) || isa<PHINode>(&I))
+ if (isa<DbgValueInst>(&I))
continue;
auto DL = I.getDebugLoc();
@@ -340,7 +715,7 @@ bool checkDebugifyMetadata(Module &M,
continue;
}
- if (!DL) {
+ if (!isa<PHINode>(&I) && !DL) {
dbg() << "WARNING: Instruction with empty DebugLoc in function ";
dbg() << F.getName() << " --";
I.print(dbg());
@@ -394,43 +769,77 @@ bool checkDebugifyMetadata(Module &M,
/// ModulePass for attaching synthetic debug info to everything, used with the
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
- bool runOnModule(Module &M) override { return applyDebugify(M); }
+ bool runOnModule(Module &M) override {
+ return applyDebugify(M, Mode, DIPreservationMap, NameOfWrappedPass);
+ }
- DebugifyModulePass() : ModulePass(ID) {}
+ DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ StringRef NameOfWrappedPass = "",
+ DebugInfoPerPassMap *DIPreservationMap = nullptr)
+ : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ DIPreservationMap(DIPreservationMap), Mode(Mode) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
static char ID; // Pass identification.
+
+private:
+ StringRef NameOfWrappedPass;
+ DebugInfoPerPassMap *DIPreservationMap;
+ enum DebugifyMode Mode;
};
/// FunctionPass for attaching synthetic debug info to instructions within a
/// single function, used with the legacy module pass manager.
struct DebugifyFunctionPass : public FunctionPass {
- bool runOnFunction(Function &F) override { return applyDebugify(F); }
+ bool runOnFunction(Function &F) override {
+ return applyDebugify(F, Mode, DIPreservationMap, NameOfWrappedPass);
+ }
- DebugifyFunctionPass() : FunctionPass(ID) {}
+ DebugifyFunctionPass(
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ StringRef NameOfWrappedPass = "",
+ DebugInfoPerPassMap *DIPreservationMap = nullptr)
+ : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ DIPreservationMap(DIPreservationMap), Mode(Mode) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
static char ID; // Pass identification.
+
+private:
+ StringRef NameOfWrappedPass;
+ DebugInfoPerPassMap *DIPreservationMap;
+ enum DebugifyMode Mode;
};
/// ModulePass for checking debug info inserted by -debugify, used with the
/// legacy module pass manager.
struct CheckDebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
- return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
- "CheckModuleDebugify", Strip, StatsMap);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ "CheckModuleDebugify", Strip, StatsMap);
+ return checkDebugInfoMetadata(
+ M, M.functions(), *DIPreservationMap,
+ "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
}
- CheckDebugifyModulePass(bool Strip = false, StringRef NameOfWrappedPass = "",
- DebugifyStatsMap *StatsMap = nullptr)
- : ModulePass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass),
- StatsMap(StatsMap) {}
+ CheckDebugifyModulePass(
+ bool Strip = false, StringRef NameOfWrappedPass = "",
+ DebugifyStatsMap *StatsMap = nullptr,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ StringRef OrigDIVerifyBugsReportFilePath = "")
+ : ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath),
+ StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode),
+ Strip(Strip) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -439,9 +848,12 @@ struct CheckDebugifyModulePass : public ModulePass {
static char ID; // Pass identification.
private:
- bool Strip;
StringRef NameOfWrappedPass;
+ StringRef OrigDIVerifyBugsReportFilePath;
DebugifyStatsMap *StatsMap;
+ DebugInfoPerPassMap *DIPreservationMap;
+ enum DebugifyMode Mode;
+ bool Strip;
};
/// FunctionPass for checking debug info inserted by -debugify-function, used
@@ -450,16 +862,26 @@ struct CheckDebugifyFunctionPass : public FunctionPass {
bool runOnFunction(Function &F) override {
Module &M = *F.getParent();
auto FuncIt = F.getIterator();
- return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
- NameOfWrappedPass, "CheckFunctionDebugify",
- Strip, StatsMap);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+ NameOfWrappedPass, "CheckFunctionDebugify",
+ Strip, StatsMap);
+ return checkDebugInfoMetadata(
+ M, make_range(FuncIt, std::next(FuncIt)), *DIPreservationMap,
+ "CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
}
- CheckDebugifyFunctionPass(bool Strip = false,
- StringRef NameOfWrappedPass = "",
- DebugifyStatsMap *StatsMap = nullptr)
- : FunctionPass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass),
- StatsMap(StatsMap) {}
+ CheckDebugifyFunctionPass(
+ bool Strip = false, StringRef NameOfWrappedPass = "",
+ DebugifyStatsMap *StatsMap = nullptr,
+ enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
+ DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ StringRef OrigDIVerifyBugsReportFilePath = "")
+ : FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass),
+ OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath),
+ StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode),
+ Strip(Strip) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -468,9 +890,12 @@ struct CheckDebugifyFunctionPass : public FunctionPass {
static char ID; // Pass identification.
private:
- bool Strip;
StringRef NameOfWrappedPass;
+ StringRef OrigDIVerifyBugsReportFilePath;
DebugifyStatsMap *StatsMap;
+ DebugInfoPerPassMap *DIPreservationMap;
+ enum DebugifyMode Mode;
+ bool Strip;
};
} // end anonymous namespace
@@ -496,12 +921,23 @@ void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) {
}
}
-ModulePass *llvm::createDebugifyModulePass() {
- return new DebugifyModulePass();
+ModulePass *createDebugifyModulePass(enum DebugifyMode Mode,
+ llvm::StringRef NameOfWrappedPass,
+ DebugInfoPerPassMap *DIPreservationMap) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new DebugifyModulePass();
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new DebugifyModulePass(Mode, NameOfWrappedPass, DIPreservationMap);
}
-FunctionPass *llvm::createDebugifyFunctionPass() {
- return new DebugifyFunctionPass();
+FunctionPass *
+createDebugifyFunctionPass(enum DebugifyMode Mode,
+ llvm::StringRef NameOfWrappedPass,
+ DebugInfoPerPassMap *DIPreservationMap) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new DebugifyFunctionPass();
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DIPreservationMap);
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
@@ -510,16 +946,28 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
-ModulePass *llvm::createCheckDebugifyModulePass(bool Strip,
- StringRef NameOfWrappedPass,
- DebugifyStatsMap *StatsMap) {
- return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
+ModulePass *createCheckDebugifyModulePass(
+ bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap,
+ enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap,
+ StringRef OrigDIVerifyBugsReportFilePath) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new CheckDebugifyModulePass(false, NameOfWrappedPass, nullptr, Mode,
+ DIPreservationMap,
+ OrigDIVerifyBugsReportFilePath);
}
-FunctionPass *
-llvm::createCheckDebugifyFunctionPass(bool Strip, StringRef NameOfWrappedPass,
- DebugifyStatsMap *StatsMap) {
- return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
+FunctionPass *createCheckDebugifyFunctionPass(
+ bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap,
+ enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap,
+ StringRef OrigDIVerifyBugsReportFilePath) {
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
+ assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
+ return new CheckDebugifyFunctionPass(false, NameOfWrappedPass, nullptr, Mode,
+ DIPreservationMap,
+ OrigDIVerifyBugsReportFilePath);
}
PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 26f8e21952cc..31d03e1e86af 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -9,6 +9,7 @@
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
@@ -17,6 +18,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
+
using namespace llvm;
static void insertCall(Function &CurFn, StringRef Func,
@@ -123,6 +125,7 @@ struct EntryExitInstrumenter : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); }
};
@@ -136,20 +139,34 @@ struct PostInlineEntryExitInstrumenter : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); }
};
char PostInlineEntryExitInstrumenter::ID = 0;
}
-INITIALIZE_PASS(
+INITIALIZE_PASS_BEGIN(
+ EntryExitInstrumenter, "ee-instrument",
+ "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(
EntryExitInstrumenter, "ee-instrument",
"Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
false, false)
-INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() "
- "(post inlining)",
- false, false)
+
+INITIALIZE_PASS_BEGIN(
+ PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
+ "Instrument function entry/exit with calls to e.g. mcount() "
+ "(post inlining)",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(
+ PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
+ "Instrument function entry/exit with calls to e.g. mcount() "
+ "(post inlining)",
+ false, false)
FunctionPass *llvm::createEntryExitInstrumenterPass() {
return new EntryExitInstrumenter();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index accedd5b4ee0..91053338df5f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -90,7 +90,7 @@ IRBuilder<> *EscapeEnumerator::Next() {
SmallVector<Value *, 16> Args;
for (unsigned I = Calls.size(); I != 0;) {
CallInst *CI = cast<CallInst>(Calls[--I]);
- changeToInvokeAndSplitBasicBlock(CI, CleanupBB);
+ changeToInvokeAndSplitBasicBlock(CI, CleanupBB, DTU);
}
Builder.SetInsertPoint(RI);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
index 732b00635e29..463c223d9e8f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -127,7 +127,7 @@ isSimpleEnoughValueToCommit(Constant *C,
/// another pointer type, we punt. We basically just support direct accesses to
/// globals and GEP's of globals. This should be kept up to date with
/// CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C) {
+static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) {
// Conservatively, avoid aggregate types. This is because we don't
// want to worry about them partially overlapping other stores.
if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
@@ -157,13 +157,14 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
if (!CE->isGEPWithNoNotionalOverIndexing())
return false;
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
-
- // A constantexpr bitcast from a pointer to another pointer is a no-op,
- // and we know how to evaluate it by moving the bitcast from the pointer
- // operand to the value operand.
+ return ConstantFoldLoadThroughGEPConstantExpr(
+ GV->getInitializer(), CE,
+ cast<GEPOperator>(CE)->getResultElementType(), DL);
} else if (CE->getOpcode() == Instruction::BitCast &&
isa<GlobalVariable>(CE->getOperand(0))) {
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
// Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
@@ -173,16 +174,16 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
-/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's
-/// type and walk down through the initial elements to obtain additional
-/// pointers to try. Returns the first non-null return value from Func, or
-/// nullptr if the type can't be introspected further.
+/// Apply \p TryLoad to Ptr. If this returns \p nullptr, introspect the
+/// pointer's type and walk down through the initial elements to obtain
+/// additional pointers to try. Returns the first non-null return value from
+/// \p TryLoad, or \p nullptr if the type can't be introspected further.
static Constant *
evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
const TargetLibraryInfo *TLI,
- std::function<Constant *(Constant *)> Func) {
+ std::function<Constant *(Constant *)> TryLoad) {
Constant *Val;
- while (!(Val = Func(Ptr))) {
+ while (!(Val = TryLoad(Ptr))) {
// If Ty is a non-opaque struct, we can convert the pointer to the struct
// into a pointer to its first member.
// FIXME: This could be extended to support arrays as well.
@@ -207,12 +208,14 @@ static Constant *getInitializer(Constant *C) {
/// Return the value that would be computed by a load from P after the stores
/// reflected by 'memory' have been performed. If we can't decide, return null.
-Constant *Evaluator::ComputeLoadResult(Constant *P) {
+Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
- auto findMemLoc = [this](Constant *Ptr) { return MutatedMemory.lookup(Ptr); };
+ auto TryFindMemLoc = [this](Constant *Ptr) {
+ return MutatedMemory.lookup(Ptr);
+ };
- if (Constant *Val = findMemLoc(P))
+ if (Constant *Val = TryFindMemLoc(P))
return Val;
// Access it.
@@ -227,7 +230,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
// Handle a constantexpr getelementptr.
case Instruction::GetElementPtr:
if (auto *I = getInitializer(CE->getOperand(0)))
- return ConstantFoldLoadThroughGEPConstantExpr(I, CE);
+ return ConstantFoldLoadThroughGEPConstantExpr(I, CE, Ty, DL);
break;
// Handle a constantexpr bitcast.
case Instruction::BitCast:
@@ -236,7 +239,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If it hasn't, we may still be able to find a stored pointer by
// introspecting the type.
Constant *Val =
- evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc);
+ evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryFindMemLoc);
if (!Val)
Val = getInitializer(CE->getOperand(0));
if (Val)
@@ -318,9 +321,10 @@ Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) {
/// Evaluate all instructions in block BB, returning true if successful, false
/// if we can't evaluate it. NewBB returns the next BB that control flows into,
-/// or null upon return.
-bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
- BasicBlock *&NextBB) {
+/// or null upon return. StrippedPointerCastsForAliasAnalysis is set to true if
+/// we looked through pointer casts to evaluate something.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
+ bool &StrippedPointerCastsForAliasAnalysis) {
// This is the main evaluation loop.
while (true) {
Constant *InstResult = nullptr;
@@ -339,7 +343,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Ptr = FoldedPtr;
LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
}
- if (!isSimpleEnoughPointerToCommit(Ptr)) {
+ if (!isSimpleEnoughPointerToCommit(Ptr, DL)) {
// If this is too complex for us to commit, reject it.
LLVM_DEBUG(
dbgs() << "Pointer is too complex for us to evaluate store.");
@@ -367,9 +371,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// legal. If it's not, we can try introspecting the type to find a
// legal conversion.
- auto castValTy = [&](Constant *P) -> Constant * {
- Type *Ty = cast<PointerType>(P->getType())->getElementType();
- if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) {
+ auto TryCastValTy = [&](Constant *P) -> Constant * {
+ // The conversion is illegal if the store is wider than the
+ // pointee proposed by `evaluateBitcastFromPtr`, since that would
+ // drop stores to other struct elements when the caller attempts to
+ // look through a struct's 0th element.
+ Type *NewTy = cast<PointerType>(P->getType())->getElementType();
+ Type *STy = Val->getType();
+ if (DL.getTypeSizeInBits(NewTy) < DL.getTypeSizeInBits(STy))
+ return nullptr;
+
+ if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, NewTy, DL)) {
Ptr = P;
return FV;
}
@@ -377,7 +389,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
};
Constant *NewVal =
- evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy);
+ evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryCastValTy);
if (!NewVal) {
LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
"evaluate.\n");
@@ -428,9 +440,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
Constant *P = getVal(GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
- for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
- i != e; ++i)
- GEPOps.push_back(getVal(*i));
+ for (Use &Op : llvm::drop_begin(GEP->operands()))
+ GEPOps.push_back(getVal(Op));
InstResult =
ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
cast<GEPOperator>(GEP)->isInBounds());
@@ -450,7 +461,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
"folding: "
<< *Ptr << "\n");
}
- InstResult = ComputeLoadResult(Ptr);
+ InstResult = ComputeLoadResult(Ptr, LI->getType());
if (!InstResult) {
LLVM_DEBUG(
dbgs() << "Failed to compute load result. Can not evaluate load."
@@ -496,7 +507,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
}
Constant *Ptr = getVal(MSI->getDest());
Constant *Val = getVal(MSI->getValue());
- Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+ Constant *DestVal =
+ ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType());
if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
// This memset is a no-op.
LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
@@ -551,56 +563,74 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n");
++CurInst;
continue;
+ } else {
+ Value *Stripped = CurInst->stripPointerCastsForAliasAnalysis();
+ // Only attempt to getVal() if we've actually managed to strip
+ // anything away, or else we'll call getVal() on the current
+ // instruction.
+ if (Stripped != &*CurInst) {
+ InstResult = getVal(Stripped);
+ }
+ if (InstResult) {
+ LLVM_DEBUG(dbgs()
+ << "Stripped pointer casts for alias analysis for "
+ "intrinsic call.\n");
+ StrippedPointerCastsForAliasAnalysis = true;
+ InstResult = ConstantExpr::getBitCast(InstResult, II->getType());
+ } else {
+ LLVM_DEBUG(dbgs() << "Unknown intrinsic. Cannot evaluate.\n");
+ return false;
+ }
}
-
- LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
- return false;
}
- // Resolve function pointers.
- SmallVector<Constant *, 8> Formals;
- Function *Callee = getCalleeWithFormalArgs(CB, Formals);
- if (!Callee || Callee->isInterposable()) {
- LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");
- return false; // Cannot resolve.
- }
+ if (!InstResult) {
+ // Resolve function pointers.
+ SmallVector<Constant *, 8> Formals;
+ Function *Callee = getCalleeWithFormalArgs(CB, Formals);
+ if (!Callee || Callee->isInterposable()) {
+ LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");
+ return false; // Cannot resolve.
+ }
- if (Callee->isDeclaration()) {
- // If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
- InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C);
- if (!InstResult)
+ if (Callee->isDeclaration()) {
+ // If this is a function we can constant fold, do it.
+ if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
+ InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C);
+ if (!InstResult)
+ return false;
+ LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
+ << *InstResult << "\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n");
return false;
- LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
- << *InstResult << "\n");
+ }
} else {
- LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n");
- return false;
- }
- } else {
- if (Callee->getFunctionType()->isVarArg()) {
- LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
- return false;
- }
+ if (Callee->getFunctionType()->isVarArg()) {
+ LLVM_DEBUG(dbgs()
+ << "Can not constant fold vararg function call.\n");
+ return false;
+ }
- Constant *RetVal = nullptr;
- // Execute the call, if successful, use the return value.
- ValueStack.emplace_back();
- if (!EvaluateFunction(Callee, RetVal, Formals)) {
- LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n");
- return false;
- }
- ValueStack.pop_back();
- InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal);
- if (RetVal && !InstResult)
- return false;
+ Constant *RetVal = nullptr;
+ // Execute the call, if successful, use the return value.
+ ValueStack.emplace_back();
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n");
+ return false;
+ }
+ ValueStack.pop_back();
+ InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal);
+ if (RetVal && !InstResult)
+ return false;
- if (InstResult) {
- LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: "
- << *InstResult << "\n\n");
- } else {
- LLVM_DEBUG(dbgs()
- << "Successfully evaluated function. Result: 0\n\n");
+ if (InstResult) {
+ LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: "
+ << *InstResult << "\n\n");
+ } else {
+ LLVM_DEBUG(dbgs()
+ << "Successfully evaluated function. Result: 0\n\n");
+ }
}
}
} else if (CurInst->isTerminator()) {
@@ -695,15 +725,27 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
- if (!EvaluateBlock(CurInst, NextBB))
+ bool StrippedPointerCastsForAliasAnalysis = false;
+
+ if (!EvaluateBlock(CurInst, NextBB, StrippedPointerCastsForAliasAnalysis))
return false;
if (!NextBB) {
// Successfully running until there's no next block means that we found
// the return. Fill it the return value and pop the call stack.
ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
- if (RI->getNumOperands())
+ if (RI->getNumOperands()) {
+ // The Evaluator can look through pointer casts as long as alias
+ // analysis holds because it's just a simple interpreter and doesn't
+ // skip memory accesses due to invariant group metadata, but we can't
+ // let users of Evaluator use a value that's been gleaned looking
+ // through stripping pointer casts.
+ if (StrippedPointerCastsForAliasAnalysis &&
+ !RI->getReturnValue()->getType()->isVoidTy()) {
+ return false;
+ }
RetVal = getVal(RI->getOperand(0));
+ }
CallStack.pop_back();
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 44af95eef67d..10f48fe827f4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -129,8 +129,7 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
CandidateLoops.erase(FirstChild, CandidateLoops.end());
- for (auto II = ChildLoops.begin(), IE = ChildLoops.end(); II != IE; ++II) {
- auto Child = *II;
+ for (Loop *Child : ChildLoops) {
LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName()
<< "\n");
// TODO: A child loop whose header is also a header in the current
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 0098dcaeb07a..dbcacc20b589 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -360,7 +360,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
// Check alias with Head2.
- if (!AA || AA->alias(&*iter1, &*BI))
+ if (!AA || !AA->isNoAlias(&*iter1, &*BI))
return false;
}
}
@@ -411,8 +411,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2,
/// approach goes for the opposite case.
bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock *IfTrue2, *IfFalse2;
- Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
- Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+ BranchInst *DomBI2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ if (!DomBI2)
+ return false;
+ Instruction *CInst2 = dyn_cast<Instruction>(DomBI2->getCondition());
if (!CInst2)
return false;
@@ -421,8 +423,10 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
return false;
BasicBlock *IfTrue1, *IfFalse1;
- Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
- Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+ BranchInst *DomBI1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ if (!DomBI1)
+ return false;
+ Instruction *CInst1 = dyn_cast<Instruction>(DomBI1->getCondition());
if (!CInst1)
return false;
@@ -479,7 +483,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
FirstEntryBlock->getInstList()
.splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
BranchInst *PBI = cast<BranchInst>(FirstEntryBlock->getTerminator());
- assert(PBI->getCondition() == IfCond2);
+ assert(PBI->getCondition() == CInst2);
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
Builder.SetInsertPoint(PBI);
@@ -494,7 +498,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
PBI->swapSuccessors();
}
Value *NC = Builder.CreateBinOp(CombineOp, CInst1, CInst2);
- PBI->replaceUsesOfWith(IfCond2, NC);
+ PBI->replaceUsesOfWith(CInst2, NC);
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
// Handle PHI node to replace its predecessors to FirstEntryBlock.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 8df7ae9563d8..2946c0018c31 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -276,10 +276,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
// When ClearDSOLocalOnDeclarations is true, clear dso_local if GV is
// converted to a declaration, to disable direct access. Don't do this if GV
// is implicitly dso_local due to a non-default visibility.
- if (ClearDSOLocalOnDeclarations && GV.isDeclarationForLinker() &&
+ if (ClearDSOLocalOnDeclarations &&
+ (GV.isDeclarationForLinker() ||
+ (isPerformingImport() && !doImportAsDefinition(&GV))) &&
!GV.isImplicitDSOLocal()) {
GV.setDSOLocal(false);
- } else if (VI && VI.isDSOLocal()) {
+ } else if (VI && VI.isDSOLocal(ImportIndex.withDSOLocalPropagation())) {
// If all summaries are dso_local, symbol gets resolved to a known local
// definition.
GV.setDSOLocal(true);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp
new file mode 100644
index 000000000000..7019e9e4451b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/HelloWorld.cpp
@@ -0,0 +1,17 @@
+//===-- HelloWorld.cpp - Example Transformations --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/HelloWorld.h"
+
+using namespace llvm;
+
+PreservedAnalyses HelloWorldPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ errs() << F.getName() << "\n";
+ return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index a2b72e4e7f03..a1e160d144dc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumCompUsedAdded,
/// lanes. The TLI assumes that all parameters and the return type of
/// CI (other than void) need to be widened to a VectorType of VF
/// lanes.
-static void addVariantDeclaration(CallInst &CI, const unsigned VF,
+static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
const StringRef VFName) {
Module *M = CI.getModule();
@@ -89,9 +89,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
Module *M = CI.getModule();
const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(),
Mappings.end());
- // All VFs in the TLI are powers of 2.
- for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); VF <= WidestVF;
- VF *= 2) {
+
+ auto AddVariantDecl = [&](const ElementCount &VF) {
const std::string TLIName =
std::string(TLI.getVectorizedFunction(ScalarName, VF));
if (!TLIName.empty()) {
@@ -105,7 +104,19 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
if (!VariantF)
addVariantDeclaration(CI, VF, TLIName);
}
- }
+ };
+
+ // All VFs in the TLI are powers of 2.
+ ElementCount WidestFixedVF, WidestScalableVF;
+ TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
+
+ for (ElementCount VF = ElementCount::getFixed(2);
+ ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
+ AddVariantDecl(VF);
+
+ // TODO: Add scalable variants once we're able to test them.
+ assert(WidestScalableVF.isZero() &&
+ "Scalable vector mappings not yet supported");
VFABI::setVectorVariantNames(&CI, Mappings);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index fb271a2118ba..792aa8208f27 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -27,8 +27,9 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
@@ -44,6 +45,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -61,6 +63,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -543,9 +546,16 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
- if (!CI || CI->doesNotThrow() || CI->isInlineAsm())
+ if (!CI || CI->doesNotThrow())
continue;
+ if (CI->isInlineAsm()) {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
+ if (!IA->canThrow()) {
+ continue;
+ }
+ }
+
// We do not need to (and in fact, cannot) convert possibly throwing calls
// to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
// invokes. The caller's "segment" of the deoptimization continuation
@@ -929,7 +939,8 @@ void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
/// parameters with noalias metadata specifying the new scope, and tag all
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
- const DataLayout &DL, AAResults *CalleeAAR) {
+ const DataLayout &DL, AAResults *CalleeAAR,
+ ClonedCodeInfo &InlinedFunctionInfo) {
if (!EnableNoAliasConversion)
return;
@@ -999,7 +1010,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
continue;
Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
+ if (!NI || InlinedFunctionInfo.isSimplified(I, NI))
continue;
bool IsArgMemOnlyCall = false, IsFuncCall = false;
@@ -1025,6 +1036,11 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
IsFuncCall = true;
if (CalleeAAR) {
FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(Call);
+
+ // We'll retain this knowledge without additional metadata.
+ if (AAResults::onlyAccessesInaccessibleMem(MRB))
+ continue;
+
if (AAResults::onlyAccessesArgPointees(MRB))
IsArgMemOnlyCall = true;
}
@@ -1280,7 +1296,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
CallInst *NewAsmp =
IRBuilder<>(&CB).CreateAlignmentAssumption(DL, ArgVal, Align);
- AC->registerAssumption(NewAsmp);
+ AC->registerAssumption(cast<AssumeInst>(NewAsmp));
}
}
}
@@ -1504,9 +1520,11 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
BI != BE; ++BI) {
// Loop metadata needs to be updated so that the start and end locs
// reference inlined-at locations.
- auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes](
- const DILocation &Loc) -> DILocation * {
- return inlineDebugLoc(&Loc, InlinedAtNode, Ctx, IANodes).get();
+ auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
+ &IANodes](Metadata *MD) -> Metadata * {
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
+ return MD;
};
updateLoopMetadataDebugLocations(*BI, updateLoopInfoLoc);
@@ -1636,6 +1654,99 @@ void llvm::updateProfileCallee(
}
}
+/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
+/// result is implicitly consumed by a call to retainRV or claimRV immediately
+/// after the call. This function inlines the retainRV/claimRV calls.
+///
+/// There are three cases to consider:
+///
+/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
+/// object in the callee return block, the autoreleaseRV call and the
+/// retainRV/claimRV call in the caller cancel out. If the call in the caller
+/// is a claimRV call, a call to objc_release is emitted.
+///
+/// 2. If there is a call in the callee return block that doesn't have operand
+/// bundle "clang.arc.attachedcall", the operand bundle on the original call
+/// is transferred to the call in the callee.
+///
+/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
+/// a retainRV call.
+static void
+inlineRetainOrClaimRVCalls(CallBase &CB,
+ const SmallVectorImpl<ReturnInst *> &Returns) {
+ Module *Mod = CB.getModule();
+ bool IsRetainRV = objcarc::hasAttachedCallOpBundle(&CB, true),
+ IsClaimRV = !IsRetainRV;
+
+ for (auto *RI : Returns) {
+ Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
+ BasicBlock::reverse_iterator I = ++(RI->getIterator().getReverse());
+ BasicBlock::reverse_iterator EI = RI->getParent()->rend();
+ bool InsertRetainCall = IsRetainRV;
+ IRBuilder<> Builder(RI->getContext());
+
+ // Walk backwards through the basic block looking for either a matching
+ // autoreleaseRV call or an unannotated call.
+ for (; I != EI;) {
+ auto CurI = I++;
+
+ // Ignore casts.
+ if (isa<CastInst>(*CurI))
+ continue;
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&*CurI)) {
+ if (II->getIntrinsicID() == Intrinsic::objc_autoreleaseReturnValue &&
+ II->hasNUses(0) &&
+ objcarc::GetRCIdentityRoot(II->getOperand(0)) == RetOpnd) {
+ // If we've found a matching authoreleaseRV call:
+ // - If claimRV is attached to the call, insert a call to objc_release
+ // and erase the autoreleaseRV call.
+ // - If retainRV is attached to the call, just erase the autoreleaseRV
+ // call.
+ if (IsClaimRV) {
+ Builder.SetInsertPoint(II);
+ Function *IFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
+ Value *BC =
+ Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
+ Builder.CreateCall(IFn, BC, "");
+ }
+ II->eraseFromParent();
+ InsertRetainCall = false;
+ }
+ } else if (auto *CI = dyn_cast<CallInst>(&*CurI)) {
+ if (objcarc::GetRCIdentityRoot(CI) == RetOpnd &&
+ !objcarc::hasAttachedCallOpBundle(CI)) {
+ // If we've found an unannotated call that defines RetOpnd, add a
+ // "clang.arc.attachedcall" operand bundle.
+ Value *BundleArgs[] = {ConstantInt::get(
+ Builder.getInt64Ty(),
+ objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))};
+ OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
+ auto *NewCall = CallBase::addOperandBundle(
+ CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI);
+ NewCall->copyMetadata(*CI);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ InsertRetainCall = false;
+ }
+ }
+
+ break;
+ }
+
+ if (InsertRetainCall) {
+ // The retainRV is attached to the call and we've failed to find a
+ // matching autoreleaseRV or an annotated call in the callee. Emit a call
+ // to objc_retain.
+ Builder.SetInsertPoint(RI);
+ Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain);
+ Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
+ Builder.CreateCall(IFn, BC, "");
+ }
+ }
+}
+
/// This function inlines the called function into the basic block of the
/// caller. This returns false if it is not possible to inline this call.
/// The program is still in a well defined state if this occurs though.
@@ -1673,6 +1784,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// ... and "funclet" operand bundles.
if (Tag == LLVMContext::OB_funclet)
continue;
+ if (Tag == LLVMContext::OB_clang_arc_attachedcall)
+ continue;
return InlineResult::failure("unsupported operand bundle");
}
@@ -1835,17 +1948,27 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, &CB);
+ &InlinedFunctionInfo);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
- if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
- // Update the BFI of blocks cloned into the caller.
- updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
- CalledFunc->front());
-
- updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB,
- IFI.PSI, IFI.CallerBFI);
+ // Insert retainRV/clainRV runtime calls.
+ if (objcarc::hasAttachedCallOpBundle(&CB))
+ inlineRetainOrClaimRVCalls(CB, Returns);
+
+ // Updated caller/callee profiles only when requested. For sample loader
+ // inlining, the context-sensitive inlinee profile doesn't need to be
+ // subtracted from callee profile, and the inlined clone also doesn't need
+ // to be scaled based on call site count.
+ if (IFI.UpdateProfile) {
+ if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
+ // Update the BFI of blocks cloned into the caller.
+ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
+ CalledFunc->front());
+
+ updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB,
+ IFI.PSI, IFI.CallerBFI);
+ }
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
@@ -1915,7 +2038,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
SAMetadataCloner.remap(FirstNewBlock, Caller->end());
// Add noalias metadata if necessary.
- AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
+ AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
// Clone return attributes on the callsite into the calls within the inlined
// function which feed into its return value.
@@ -1929,9 +2052,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
for (BasicBlock &NewBlock :
make_range(FirstNewBlock->getIterator(), Caller->end()))
for (Instruction &I : NewBlock)
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- IFI.GetAssumptionCache(*Caller).registerAssumption(II);
+ if (auto *II = dyn_cast<AssumeInst>(&I))
+ IFI.GetAssumptionCache(*Caller).registerAssumption(II);
}
// If there are any alloca instructions in the block that used to be the entry
@@ -2068,7 +2190,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
- if (InsertLifetime && !IFI.StaticAllocas.empty()) {
+ // We need to insert lifetime intrinsics even at O0 to avoid invalid
+ // access caused by multithreaded coroutines. The check
+ // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
+ if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
+ !IFI.StaticAllocas.empty()) {
IRBuilder<> builder(&FirstNewBlock->front());
for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
AllocaInst *AI = IFI.StaticAllocas[ai];
@@ -2201,7 +2327,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// As such, we replace the cleanupret with unreachable.
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
- changeToUnreachable(CleanupRet, /*UseLLVMTrap=*/false);
+ changeToUnreachable(CleanupRet);
Instruction *I = BB->getFirstNonPHI();
if (!I->isEHPad())
@@ -2255,6 +2381,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
SmallVector<OperandBundleDef, 1> OpBundles;
DeoptCall->getOperandBundlesAsDefs(OpBundles);
+ auto DeoptAttributes = DeoptCall->getAttributes();
DeoptCall->eraseFromParent();
assert(!OpBundles.empty() &&
"Expected at least the deopt operand bundle");
@@ -2263,6 +2390,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
CallInst *NewDeoptCall =
Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
NewDeoptCall->setCallingConv(CallingConv);
+ NewDeoptCall->setAttributes(DeoptAttributes);
if (NewDeoptCall->getType()->isVoidTy())
Builder.CreateRetVoid();
else
@@ -2315,14 +2443,17 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// before we splice the inlined code into the CFG and lose track of which
// blocks were actually inlined, collect the call sites. We only do this if
// call graph updates weren't requested, as those provide value handle based
- // tracking of inlined call sites instead.
+ // tracking of inlined call sites instead. Calls to intrinsics are not
+ // collected because they are not inlineable.
if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
// Otherwise just collect the raw call sites that were inlined.
for (BasicBlock &NewBB :
make_range(FirstNewBlock->getIterator(), Caller->end()))
for (Instruction &I : NewBB)
if (auto *CB = dyn_cast<CallBase>(&I))
- IFI.InlinedCallSites.push_back(CB);
+ if (!(CB->getCalledFunction() &&
+ CB->getCalledFunction()->isIntrinsic()))
+ IFI.InlinedCallSites.push_back(CB);
}
// If we cloned in _exactly one_ basic block, and if that block ends in a
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
index 7437701f5339..277fd903e9aa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -38,6 +38,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -48,7 +49,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -236,7 +236,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
llvm::findDbgValues(DbgValues, I);
// Update pre-existing debug value uses that reside outside the loop.
- auto &Ctx = I->getContext();
for (auto DVI : DbgValues) {
BasicBlock *UserBB = DVI->getParent();
if (InstBB == UserBB || L->contains(UserBB))
@@ -247,7 +246,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
: SSAUpdate.FindValueForBlock(UserBB);
if (V)
- DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
+ DVI->replaceVariableLocationOp(I, V);
}
// SSAUpdater might have inserted phi-nodes inside other loops. We'll need
@@ -504,9 +503,6 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- PA.preserve<BasicAA>();
- PA.preserve<GlobalsAA>();
- PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
// BPI maps terminators to probabilities, since we don't modify the CFG, no
// updates are needed to preserve it.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 4c52fac6f7cb..7e5832148bc0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -555,7 +555,6 @@ PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
if (!runImpl(F, TLI, DT))
return PreservedAnalyses::all();
auto PA = PreservedAnalyses();
- PA.preserve<GlobalsAA>();
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index ae26058c210c..d03d76f57ca1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
@@ -65,6 +64,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -111,8 +111,8 @@ static cl::opt<unsigned> PHICSENumPHISmallSize(
"perform a (faster!) exhaustive search instead of set-driven one."));
// Max recursion depth for collectBitParts used when detecting bswap and
-// bitreverse idioms
-static const unsigned BitPartRecursionMaxDepth = 64;
+// bitreverse idioms.
+static const unsigned BitPartRecursionMaxDepth = 48;
//===----------------------------------------------------------------------===//
// Local constant propagation.
@@ -148,7 +148,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Dest1->removePredecessor(BI->getParent());
// Replace the conditional branch with an unconditional one.
- Builder.CreateBr(Dest1);
+ BranchInst *NewBI = Builder.CreateBr(Dest1);
+
+ // Transfer the metadata to the new branch instruction.
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
Value *Cond = BI->getCondition();
BI->eraseFromParent();
if (DeleteDeadConditions)
@@ -167,7 +172,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
OldDest->removePredecessor(BB);
// Replace the conditional branch with an unconditional one.
- Builder.CreateBr(Destination);
+ BranchInst *NewBI = Builder.CreateBr(Destination);
+
+ // Transfer the metadata to the new branch instruction.
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
BI->eraseFromParent();
if (DTU)
DTU->applyUpdates({{DominatorTree::Delete, BB, OldDest}});
@@ -257,7 +267,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Builder.CreateBr(TheOnlyDest);
BasicBlock *BB = SI->getParent();
- SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
+ SmallSet<BasicBlock *, 8> RemovedSuccessors;
// Remove entries from PHI nodes which we no longer branch to...
BasicBlock *SuccToKeep = TheOnlyDest;
@@ -329,7 +339,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (auto *BA =
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
- SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
+ SmallSet<BasicBlock *, 8> RemovedSuccessors;
// Insert the new branch.
Builder.CreateBr(TheOnlyDest);
@@ -410,7 +420,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
}
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
- if (DVI->getValue())
+ if (DVI->hasArgList() || DVI->getValue(0))
return false;
return true;
}
@@ -456,13 +466,18 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
// sophisticated tradeoffs for guards considering potential for check
// widening, but for now we keep things simple.
if ((II->getIntrinsicID() == Intrinsic::assume &&
- isAssumeWithEmptyBundle(*II)) ||
+ isAssumeWithEmptyBundle(cast<AssumeInst>(*II))) ||
II->getIntrinsicID() == Intrinsic::experimental_guard) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
return !Cond->isZero();
return false;
}
+
+ if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
+ Optional<fp::ExceptionBehavior> ExBehavior = FPI->getExceptionBehavior();
+ return ExBehavior.getValue() != fp::ebStrict;
+ }
}
if (isAllocLikeFn(I, TLI))
@@ -476,6 +491,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (isMathLibCallNoop(Call, TLI))
return true;
+ // To express possible interaction with floating point environment constrained
+ // intrinsics are described as if they access memory. So they look like having
+ // side effect but actually do not have it unless they raise floating point
+ // exception. If FP exceptions are ignored, the intrinsic may be deleted.
+ if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
+ Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+ if (!EB || *EB == fp::ExceptionBehavior::ebIgnore)
+ return true;
+ }
+
return false;
}
@@ -565,8 +590,7 @@ bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
findDbgUsers(DbgUsers, I);
for (auto *DII : DbgUsers) {
Value *Undef = UndefValue::get(I->getType());
- DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
- ValueAsMetadata::get(Undef)));
+ DII->replaceVariableLocationOp(I, Undef);
}
return !DbgUsers.empty();
}
@@ -729,21 +753,22 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
BasicBlock *PredBB = DestBB->getSinglePredecessor();
assert(PredBB && "Block doesn't have a single predecessor!");
- bool ReplaceEntryBB = false;
- if (PredBB == &DestBB->getParent()->getEntryBlock())
- ReplaceEntryBB = true;
+ bool ReplaceEntryBB = PredBB->isEntryBlock();
// DTU updates: Collect all the edges that enter
// PredBB. These dominator edges will be redirected to DestBB.
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) {
+ SmallPtrSet<BasicBlock *, 2> PredsOfPredBB(pred_begin(PredBB),
+ pred_end(PredBB));
+ Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1);
+ for (BasicBlock *PredOfPredBB : PredsOfPredBB)
// This predecessor of PredBB may already have DestBB as a successor.
- if (!llvm::is_contained(successors(*I), DestBB))
- Updates.push_back({DominatorTree::Insert, *I, DestBB});
- Updates.push_back({DominatorTree::Delete, *I, PredBB});
- }
+ if (PredOfPredBB != PredBB)
+ Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB});
+ for (BasicBlock *PredOfPredBB : PredsOfPredBB)
+ Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB});
Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
}
@@ -1057,8 +1082,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
// We cannot fold the block if it's a branch to an already present callbr
// successor because that creates duplicate successors.
- for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
- if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) {
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ if (auto *CBI = dyn_cast<CallBrInst>(PredBB->getTerminator())) {
if (Succ == CBI->getDefaultDest())
return false;
for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
@@ -1072,14 +1097,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
// All predecessors of BB will be moved to Succ.
- SmallSetVector<BasicBlock *, 8> Predecessors(pred_begin(BB), pred_end(BB));
- Updates.reserve(Updates.size() + 2 * Predecessors.size());
- for (auto *Predecessor : Predecessors) {
+ SmallPtrSet<BasicBlock *, 8> PredsOfBB(pred_begin(BB), pred_end(BB));
+ SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ));
+ Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1);
+ for (auto *PredOfBB : PredsOfBB)
// This predecessor of BB may already have Succ as a successor.
- if (!llvm::is_contained(successors(Predecessor), Succ))
- Updates.push_back({DominatorTree::Insert, Predecessor, Succ});
- Updates.push_back({DominatorTree::Delete, Predecessor, BB});
- }
+ if (!PredsOfSucc.contains(PredOfBB))
+ Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
+ for (auto *PredOfBB : PredsOfBB)
+ Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
Updates.push_back({DominatorTree::Delete, BB, Succ});
}
@@ -1119,10 +1145,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
Instruction *TI = BB->getTerminator();
if (TI)
if (MDNode *LoopMD = TI->getMetadata(LoopMDKind))
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
+ for (BasicBlock *Pred : predecessors(BB))
Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
- }
// Everything that jumped to BB now goes to Succ.
BB->replaceAllUsesWith(Succ);
@@ -1135,12 +1159,11 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
assert(succ_empty(BB) && "The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- if (DTU) {
+ if (DTU)
DTU->applyUpdates(Updates);
- DTU->deleteBB(BB);
- } else {
- BB->eraseFromParent(); // Delete the old basic block.
- }
+
+ DeleteDeadBlock(BB, DTU);
+
return true;
}
@@ -1356,7 +1379,7 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
SmallVector<DbgValueInst *, 1> DbgValues;
findDbgValues(DbgValues, APN);
for (auto *DVI : DbgValues) {
- assert(DVI->getValue() == APN);
+ assert(is_contained(DVI->getValues(), APN));
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
@@ -1383,13 +1406,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
// We can't always calculate the size of the DI variable (e.g. if it is a
// VLA). Try to use the size of the alloca that the dbg intrinsic describes
// intead.
- if (DII->isAddressOfVariable())
- if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation()))
+ if (DII->isAddressOfVariable()) {
+ // DII should have exactly 1 location when it is an address.
+ assert(DII->getNumVariableLocationOps() == 1 &&
+ "address of variable must have exactly 1 location operand.");
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DII->getVariableLocationOp(0))) {
if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
assert(ValueSize.isScalable() == FragmentSize->isScalable() &&
"Both sizes should agree on the scalable flag.");
return TypeSize::isKnownGE(ValueSize, *FragmentSize);
}
+ }
+ }
// Could not determine size of variable. Conservatively return false.
return false;
}
@@ -1400,7 +1429,7 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
/// case this DebugLoc leaks into any adjacent instructions.
static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
// Original dbg.declare must have a location.
- DebugLoc DeclareLoc = DII->getDebugLoc();
+ const DebugLoc &DeclareLoc = DII->getDebugLoc();
MDNode *Scope = DeclareLoc.getScope();
DILocation *InlinedAt = DeclareLoc.getInlinedAt();
// Produce an unknown location with the correct scope / inlinedAt fields.
@@ -1592,93 +1621,56 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
ValueToValueMapTy DbgValueMap;
for (auto &I : *BB) {
if (auto DbgII = dyn_cast<DbgVariableIntrinsic>(&I)) {
- if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
- DbgValueMap.insert({Loc, DbgII});
+ for (Value *V : DbgII->location_ops())
+ if (auto *Loc = dyn_cast_or_null<PHINode>(V))
+ DbgValueMap.insert({Loc, DbgII});
}
}
if (DbgValueMap.size() == 0)
return;
+ // Map a pair of the destination BB and old dbg.value to the new dbg.value,
+ // so that if a dbg.value is being rewritten to use more than one of the
+ // inserted PHIs in the same destination BB, we can update the same dbg.value
+ // with all the new PHIs instead of creating one copy for each.
+ MapVector<std::pair<BasicBlock *, DbgVariableIntrinsic *>,
+ DbgVariableIntrinsic *>
+ NewDbgValueMap;
// Then iterate through the new PHIs and look to see if they use one of the
- // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will
- // propagate the info through the new PHI.
- LLVMContext &C = BB->getContext();
+ // previously mapped PHIs. If so, create a new dbg.value intrinsic that will
+ // propagate the info through the new PHI. If we use more than one new PHI in
+ // a single destination BB with the same old dbg.value, merge the updates so
+ // that we get a single new dbg.value with all the new PHIs.
for (auto PHI : InsertedPHIs) {
BasicBlock *Parent = PHI->getParent();
// Avoid inserting an intrinsic into an EH block.
if (Parent->getFirstNonPHI()->isEHPad())
continue;
- auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI));
for (auto VI : PHI->operand_values()) {
auto V = DbgValueMap.find(VI);
if (V != DbgValueMap.end()) {
auto *DbgII = cast<DbgVariableIntrinsic>(V->second);
- Instruction *NewDbgII = DbgII->clone();
- NewDbgII->setOperand(0, PhiMAV);
- auto InsertionPt = Parent->getFirstInsertionPt();
- assert(InsertionPt != Parent->end() && "Ill-formed basic block");
- NewDbgII->insertBefore(&*InsertionPt);
+ auto NewDI = NewDbgValueMap.find({Parent, DbgII});
+ if (NewDI == NewDbgValueMap.end()) {
+ auto *NewDbgII = cast<DbgVariableIntrinsic>(DbgII->clone());
+ NewDI = NewDbgValueMap.insert({{Parent, DbgII}, NewDbgII}).first;
+ }
+ DbgVariableIntrinsic *NewDbgII = NewDI->second;
+ // If PHI contains VI as an operand more than once, we may
+ // replaced it in NewDbgII; confirm that it is present.
+ if (is_contained(NewDbgII->location_ops(), VI))
+ NewDbgII->replaceVariableLocationOp(VI, PHI);
}
}
}
-}
-
-/// Finds all intrinsics declaring local variables as living in the memory that
-/// 'V' points to. This may include a mix of dbg.declare and
-/// dbg.addr intrinsics.
-TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return {};
- auto *L = LocalAsMetadata::getIfExists(V);
- if (!L)
- return {};
- auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L);
- if (!MDV)
- return {};
-
- TinyPtrVector<DbgVariableIntrinsic *> Declares;
- for (User *U : MDV->users()) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U))
- if (DII->isAddressOfVariable())
- Declares.push_back(DII);
+ // Insert thew new dbg.values into their destination blocks.
+ for (auto DI : NewDbgValueMap) {
+ BasicBlock *Parent = DI.first.first;
+ auto *NewDbgII = DI.second;
+ auto InsertionPt = Parent->getFirstInsertionPt();
+ assert(InsertionPt != Parent->end() && "Ill-formed basic block");
+ NewDbgII->insertBefore(&*InsertionPt);
}
-
- return Declares;
-}
-
-TinyPtrVector<DbgDeclareInst *> llvm::FindDbgDeclareUses(Value *V) {
- TinyPtrVector<DbgDeclareInst *> DDIs;
- for (DbgVariableIntrinsic *DVI : FindDbgAddrUses(V))
- if (auto *DDI = dyn_cast<DbgDeclareInst>(DVI))
- DDIs.push_back(DDI);
- return DDIs;
-}
-
-void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return;
- if (auto *L = LocalAsMetadata::getIfExists(V))
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
- for (User *U : MDV->users())
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DbgValues.push_back(DVI);
-}
-
-void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
- Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return;
- if (auto *L = LocalAsMetadata::getIfExists(V))
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
- for (User *U : MDV->users())
- if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
- DbgUsers.push_back(DII);
}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
@@ -1686,7 +1678,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
int Offset) {
auto DbgAddrs = FindDbgAddrUses(Address);
for (DbgVariableIntrinsic *DII : DbgAddrs) {
- DebugLoc Loc = DII->getDebugLoc();
+ const DebugLoc &Loc = DII->getDebugLoc();
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
@@ -1701,7 +1693,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
DIBuilder &Builder, int Offset) {
- DebugLoc Loc = DVI->getDebugLoc();
+ const DebugLoc &Loc = DVI->getDebugLoc();
auto *DIVar = DVI->getVariable();
auto *DIExpr = DVI->getExpression();
assert(DIVar && "Missing variable");
@@ -1726,16 +1718,9 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset) {
if (auto *L = LocalAsMetadata::getIfExists(AI))
if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
- for (auto UI = MDV->use_begin(), UE = MDV->use_end(); UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(MDV->uses()))
if (auto *DVI = dyn_cast<DbgValueInst>(U.getUser()))
replaceOneDbgValueForAlloca(DVI, NewAllocaAddress, Builder, Offset);
- }
-}
-
-/// Wrap \p V in a ValueAsMetadata instance.
-static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) {
- return MetadataAsValue::get(C, ValueAsMetadata::get(V));
}
/// Where possible to salvage debug information for \p I do so
@@ -1748,26 +1733,53 @@ void llvm::salvageDebugInfo(Instruction &I) {
void llvm::salvageDebugInfoForDbgValues(
Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
- auto &Ctx = I.getContext();
+ // This is an arbitrary chosen limit on the maximum number of values we can
+ // salvage up to in a DIArgList, used for performance reasons.
+ const unsigned MaxDebugArgs = 16;
bool Salvaged = false;
- auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
for (auto *DII : DbgUsers) {
// Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
// are implicitly pointing out the value as a DWARF memory location
// description.
bool StackValue = isa<DbgValueInst>(DII);
-
- DIExpression *DIExpr =
- salvageDebugInfoImpl(I, DII->getExpression(), StackValue);
-
+ auto DIILocation = DII->location_ops();
+ assert(
+ is_contained(DIILocation, &I) &&
+ "DbgVariableIntrinsic must use salvaged instruction as its location");
+ SmallVector<Value *, 4> AdditionalValues;
+ // `I` may appear more than once in DII's location ops, and each use of `I`
+ // must be updated in the DIExpression and potentially have additional
+ // values added; thus we call salvageDebugInfoImpl for each `I` instance in
+ // DIILocation.
+ DIExpression *SalvagedExpr = DII->getExpression();
+ auto LocItr = find(DIILocation, &I);
+ while (SalvagedExpr && LocItr != DIILocation.end()) {
+ unsigned LocNo = std::distance(DIILocation.begin(), LocItr);
+ SalvagedExpr = salvageDebugInfoImpl(I, SalvagedExpr, StackValue, LocNo,
+ AdditionalValues);
+ LocItr = std::find(++LocItr, DIILocation.end(), &I);
+ }
// salvageDebugInfoImpl should fail on examining the first element of
// DbgUsers, or none of them.
- if (!DIExpr)
+ if (!SalvagedExpr)
break;
- DII->setOperand(0, wrapMD(I.getOperand(0)));
- DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
+ DII->replaceVariableLocationOp(&I, I.getOperand(0));
+ if (AdditionalValues.empty()) {
+ DII->setExpression(SalvagedExpr);
+ } else if (isa<DbgValueInst>(DII) &&
+ DII->getNumVariableLocationOps() + AdditionalValues.size() <=
+ MaxDebugArgs) {
+ DII->addVariableLocationOps(AdditionalValues, SalvagedExpr);
+ } else {
+ // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is
+ // currently only valid for stack value expressions.
+ // Also do not salvage if the resulting DIArgList would contain an
+ // unreasonably large number of values.
+ Value *Undef = UndefValue::get(I.getOperand(0)->getType());
+ DII->replaceVariableLocationOp(I.getOperand(0), Undef);
+ }
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
Salvaged = true;
}
@@ -1777,14 +1789,111 @@ void llvm::salvageDebugInfoForDbgValues(
for (auto *DII : DbgUsers) {
Value *Undef = UndefValue::get(I.getType());
- DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
- ValueAsMetadata::get(Undef)));
+ DII->replaceVariableLocationOp(&I, Undef);
+ }
+}
+
+bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
+ uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
+ // Rewrite a GEP into a DIExpression.
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(BitWidth, 0);
+ if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
+ return false;
+ if (!VariableOffsets.empty() && !CurrentLocOps) {
+ Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0});
+ CurrentLocOps = 1;
+ }
+ for (auto Offset : VariableOffsets) {
+ AdditionalValues.push_back(Offset.first);
+ assert(Offset.second.isStrictlyPositive() &&
+ "Expected strictly positive multiplier for offset.");
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps++, dwarf::DW_OP_constu,
+ Offset.second.getZExtValue(), dwarf::DW_OP_mul,
+ dwarf::DW_OP_plus});
+ }
+ DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue());
+ return true;
+}
+
+uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return dwarf::DW_OP_plus;
+ case Instruction::Sub:
+ return dwarf::DW_OP_minus;
+ case Instruction::Mul:
+ return dwarf::DW_OP_mul;
+ case Instruction::SDiv:
+ return dwarf::DW_OP_div;
+ case Instruction::SRem:
+ return dwarf::DW_OP_mod;
+ case Instruction::Or:
+ return dwarf::DW_OP_or;
+ case Instruction::And:
+ return dwarf::DW_OP_and;
+ case Instruction::Xor:
+ return dwarf::DW_OP_xor;
+ case Instruction::Shl:
+ return dwarf::DW_OP_shl;
+ case Instruction::LShr:
+ return dwarf::DW_OP_shr;
+ case Instruction::AShr:
+ return dwarf::DW_OP_shra;
+ default:
+ // TODO: Salvage from each kind of binop we know about.
+ return 0;
+ }
+}
+
+bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ // Handle binary operations with constant integer operands as a special case.
+ auto *ConstInt = dyn_cast<ConstantInt>(BI->getOperand(1));
+ // Values wider than 64 bits cannot be represented within a DIExpression.
+ if (ConstInt && ConstInt->getBitWidth() > 64)
+ return false;
+
+ Instruction::BinaryOps BinOpcode = BI->getOpcode();
+ // Push any Constant Int operand onto the expression stack.
+ if (ConstInt) {
+ uint64_t Val = ConstInt->getSExtValue();
+ // Add or Sub Instructions with a constant operand can potentially be
+ // simplified.
+ if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) {
+ uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val);
+ DIExpression::appendOffset(Opcodes, Offset);
+ return true;
+ }
+ Opcodes.append({dwarf::DW_OP_constu, Val});
+ } else {
+ if (!CurrentLocOps) {
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, 0});
+ CurrentLocOps = 1;
+ }
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps});
+ AdditionalValues.push_back(BI->getOperand(1));
}
+
+ // Add salvaged binary operator to expression stack, if it has a valid
+ // representation in a DIExpression.
+ uint64_t DwarfBinOp = getDwarfOpForBinOp(BinOpcode);
+ if (!DwarfBinOp)
+ return false;
+ Opcodes.push_back(DwarfBinOp);
+
+ return true;
}
-DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
- DIExpression *SrcDIExpr,
- bool WithStackValue) {
+DIExpression *
+llvm::salvageDebugInfoImpl(Instruction &I, DIExpression *SrcDIExpr,
+ bool WithStackValue, unsigned LocNo,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ uint64_t CurrentLocOps = SrcDIExpr->getNumLocationOperands();
auto &M = *I.getModule();
auto &DL = M.getDataLayout();
@@ -1792,20 +1901,13 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
DIExpression *DIExpr = SrcDIExpr;
if (!Ops.empty()) {
- DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, LocNo, WithStackValue);
}
return DIExpr;
};
- // Apply the given offset to the source DIExpression.
- auto applyOffset = [&](uint64_t Offset) -> DIExpression * {
- SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
- return doSalvage(Ops);
- };
-
// initializer-list helper for applying operators to the source DIExpression.
- auto applyOps = [&](ArrayRef<uint64_t> Opcodes) -> DIExpression * {
+ auto applyOps = [&](ArrayRef<uint64_t> Opcodes) {
SmallVector<uint64_t, 8> Ops(Opcodes.begin(), Opcodes.end());
return doSalvage(Ops);
};
@@ -1829,54 +1931,17 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
isa<SExtInst>(&I)));
}
+ SmallVector<uint64_t, 8> Ops;
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- unsigned BitWidth =
- M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
- // Rewrite a constant GEP into a DIExpression.
- APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
- return applyOffset(Offset.getSExtValue());
- } else {
- return nullptr;
- }
+ if (getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues))
+ return doSalvage(Ops);
} else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
- // Rewrite binary operations with constant integer operands.
- auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1));
- if (!ConstInt || ConstInt->getBitWidth() > 64)
- return nullptr;
-
- uint64_t Val = ConstInt->getSExtValue();
- switch (BI->getOpcode()) {
- case Instruction::Add:
- return applyOffset(Val);
- case Instruction::Sub:
- return applyOffset(-int64_t(Val));
- case Instruction::Mul:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
- case Instruction::SDiv:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
- case Instruction::SRem:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
- case Instruction::Or:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
- case Instruction::And:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
- case Instruction::Xor:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
- case Instruction::Shl:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
- case Instruction::LShr:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
- case Instruction::AShr:
- return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
- default:
- // TODO: Salvage constants from each kind of binop we know about.
- return nullptr;
- }
- // *Not* to do: we should not attempt to salvage load instructions,
- // because the validity and lifetime of a dbg.value containing
- // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
+ if (getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues))
+ return doSalvage(Ops);
}
+ // *Not* to do: we should not attempt to salvage load instructions,
+ // because the validity and lifetime of a dbg.value containing
+ // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
return nullptr;
}
@@ -1922,13 +1987,12 @@ static bool rewriteDebugUsers(
if (UndefOrSalvage.count(DII))
continue;
- LLVMContext &Ctx = DII->getContext();
DbgValReplacement DVR = RewriteExpr(*DII);
if (!DVR)
continue;
- DII->setOperand(0, wrapValueInMetadata(Ctx, &To));
- DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR));
+ DII->replaceVariableLocationOp(&From, &To);
+ DII->setExpression(*DVR);
LLVM_DEBUG(dbgs() << "REWRITE: " << *DII << '\n');
Changed = true;
}
@@ -2046,15 +2110,15 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
return {NumDeadInst, NumDeadDbgInst};
}
-unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA, DomTreeUpdater *DTU,
+unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA,
+ DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
BasicBlock *BB = I->getParent();
if (MSSAU)
MSSAU->changeToUnreachable(I);
- SmallSetVector<BasicBlock *, 8> UniqueSuccessors;
+ SmallSet<BasicBlock *, 8> UniqueSuccessors;
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
@@ -2063,14 +2127,6 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
if (DTU)
UniqueSuccessors.insert(Successor);
}
- // Insert a call to llvm.trap right before this. This turns the undefined
- // behavior into a hard fail instead of falling through into random code.
- if (UseLLVMTrap) {
- Function *TrapFn =
- Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
- CallTrap->setDebugLoc(I->getDebugLoc());
- }
auto *UI = new UnreachableInst(I->getContext(), I);
UI->setDebugLoc(I->getDebugLoc());
@@ -2139,15 +2195,16 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
- BasicBlock *UnwindEdge) {
+ BasicBlock *UnwindEdge,
+ DomTreeUpdater *DTU) {
BasicBlock *BB = CI->getParent();
// Convert this function call into an invoke instruction. First, split the
// basic block.
- BasicBlock *Split =
- BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
+ BasicBlock *Split = SplitBlock(BB, CI, DTU, /*LI=*/nullptr, /*MSSAU*/ nullptr,
+ CI->getName() + ".noexc");
- // Delete the unconditional branch inserted by splitBasicBlock
+ // Delete the unconditional branch inserted by SplitBlock
BB->getInstList().pop_back();
// Create the new invoke instruction.
@@ -2167,6 +2224,9 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, BB, UnwindEdge}});
+
// Make sure that anything using the call now uses the invoke! This also
// updates the CallGraph if present, because it uses a WeakTrackingVH.
CI->replaceAllUsesWith(II);
@@ -2203,7 +2263,7 @@ static bool markAliveBlocks(Function &F,
if (IntrinsicID == Intrinsic::assume) {
if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI, false, false, DTU);
+ changeToUnreachable(CI, false, DTU);
Changed = true;
break;
}
@@ -2219,8 +2279,7 @@ static bool markAliveBlocks(Function &F,
// still be useful for widening.
if (match(CI->getArgOperand(0), m_Zero()))
if (!isa<UnreachableInst>(CI->getNextNode())) {
- changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false,
- false, DTU);
+ changeToUnreachable(CI->getNextNode(), false, DTU);
Changed = true;
break;
}
@@ -2228,7 +2287,7 @@ static bool markAliveBlocks(Function &F,
} else if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(CI->getFunction())) ||
isa<UndefValue>(Callee)) {
- changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DTU);
+ changeToUnreachable(CI, false, DTU);
Changed = true;
break;
}
@@ -2238,7 +2297,7 @@ static bool markAliveBlocks(Function &F,
// though.
if (!isa<UnreachableInst>(CI->getNextNode())) {
// Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(CI->getNextNode(), false, false, DTU);
+ changeToUnreachable(CI->getNextNode(), false, DTU);
Changed = true;
}
break;
@@ -2257,7 +2316,7 @@ static bool markAliveBlocks(Function &F,
(isa<ConstantPointerNull>(Ptr) &&
!NullPointerIsDefined(SI->getFunction(),
SI->getPointerAddressSpace()))) {
- changeToUnreachable(SI, true, false, DTU);
+ changeToUnreachable(SI, false, DTU);
Changed = true;
break;
}
@@ -2271,7 +2330,7 @@ static bool markAliveBlocks(Function &F,
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(BB->getParent())) ||
isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true, false, DTU);
+ changeToUnreachable(II, false, DTU);
Changed = true;
} else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
if (II->use_empty() && II->onlyReadsMemory()) {
@@ -2311,7 +2370,7 @@ static bool markAliveBlocks(Function &F,
}
};
- SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
+ SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
// Set of unique CatchPads.
SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4,
CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>>
@@ -2321,22 +2380,25 @@ static bool markAliveBlocks(Function &F,
E = CatchSwitch->handler_end();
I != E; ++I) {
BasicBlock *HandlerBB = *I;
- ++NumPerSuccessorCases[HandlerBB];
+ if (DTU)
+ ++NumPerSuccessorCases[HandlerBB];
auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI());
if (!HandlerSet.insert({CatchPad, Empty}).second) {
- --NumPerSuccessorCases[HandlerBB];
+ if (DTU)
+ --NumPerSuccessorCases[HandlerBB];
CatchSwitch->removeHandler(I);
--I;
--E;
Changed = true;
}
}
- std::vector<DominatorTree::UpdateType> Updates;
- for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
- if (I.second == 0)
- Updates.push_back({DominatorTree::Delete, BB, I.first});
- if (DTU)
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, BB, I.first});
DTU->applyUpdates(Updates);
+ }
}
Changed |= ConstantFoldTerminator(BB, true, nullptr, DTU);
@@ -2418,44 +2480,7 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
if (MSSAU)
MSSAU->removeBlocks(BlocksToRemove);
- // Loop over all of the basic blocks that are up for removal, dropping all of
- // their internal references. Update DTU if available.
- std::vector<DominatorTree::UpdateType> Updates;
- for (auto *BB : BlocksToRemove) {
- SmallSetVector<BasicBlock *, 8> UniqueSuccessors;
- for (BasicBlock *Successor : successors(BB)) {
- // Only remove references to BB in reachable successors of BB.
- if (Reachable.count(Successor))
- Successor->removePredecessor(BB);
- if (DTU)
- UniqueSuccessors.insert(Successor);
- }
- BB->dropAllReferences();
- if (DTU) {
- Instruction *TI = BB->getTerminator();
- assert(TI && "Basic block should have a terminator");
- // Terminators like invoke can have users. We have to replace their users,
- // before removing them.
- if (!TI->use_empty())
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- TI->eraseFromParent();
- new UnreachableInst(BB->getContext(), BB);
- assert(succ_empty(BB) && "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
- Updates.reserve(Updates.size() + UniqueSuccessors.size());
- for (auto *UniqueSuccessor : UniqueSuccessors)
- Updates.push_back({DominatorTree::Delete, BB, UniqueSuccessor});
- }
- }
-
- if (DTU) {
- DTU->applyUpdates(Updates);
- for (auto *BB : BlocksToRemove)
- DTU->deleteBB(BB);
- } else {
- for (auto *BB : BlocksToRemove)
- BB->eraseFromParent();
- }
+ DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU);
return Changed;
}
@@ -2686,11 +2711,10 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
DominatorTree &DT,
const BasicBlock *BB) {
- auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) {
- auto *I = cast<Instruction>(U.getUser())->getParent();
- return DT.properlyDominates(BB, I);
+ auto Dominates = [&DT](const BasicBlock *BB, const Use &U) {
+ return DT.dominates(BB, U);
};
- return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
+ return ::replaceDominatedUsesWith(From, To, BB, Dominates);
}
bool llvm::callsGCLeafFunction(const CallBase *Call,
@@ -2795,13 +2819,14 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
// TODO: Extend llvm.dbg.value to take more than one SSA Value (PR39141) to
// encode predicated DIExpressions that yield different results on different
// code paths.
+
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
Instruction *I = &*II;
- I->dropUnknownNonDebugMetadata();
+ I->dropUndefImplyingAttrsAndUnknownMetadata();
if (I->isUsedByMetadata())
dropDebugUsers(*I);
- if (isa<DbgInfoIntrinsic>(I)) {
- // Remove DbgInfo Intrinsics.
+ if (I->isDebugOrPseudoInst()) {
+ // Remove DbgInfo and pseudo probe Intrinsics.
II = I->eraseFromParent();
continue;
}
@@ -2863,7 +2888,8 @@ struct BitPart {
/// does not invalidate internal references (std::map instead of DenseMap).
static const Optional<BitPart> &
collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
- std::map<Value *, Optional<BitPart>> &BPS, int Depth) {
+ std::map<Value *, Optional<BitPart>> &BPS, int Depth,
+ bool &FoundRoot) {
auto I = BPS.find(V);
if (I != BPS.end())
return I->second;
@@ -2871,6 +2897,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
auto &Result = BPS[V] = None;
auto BitWidth = V->getType()->getScalarSizeInBits();
+ // Can't do integer/elements > 128 bits.
+ if (BitWidth > 128)
+ return Result;
+
// Prevent stack overflow by limiting the recursion depth
if (Depth == BitPartRecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
@@ -2883,17 +2913,18 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If this is an or instruction, it may be an inner node of the bswap.
if (match(V, m_Or(m_Value(X), m_Value(Y)))) {
- const auto &A =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
- const auto &B =
- collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
- if (!A || !B)
+ // Check we have both sources and they are from the same provider.
+ const auto &A = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!A || !A->Provider)
return Result;
- // Try and merge the two together.
- if (!A->Provider || A->Provider != B->Provider)
+ const auto &B = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!B || A->Provider != B->Provider)
return Result;
+ // Try and merge the two together.
Result = BitPart(A->Provider, BitWidth);
for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx) {
if (A->Provenance[BitIdx] != BitPart::Unset &&
@@ -2918,8 +2949,12 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
if (BitShift.uge(BitWidth))
return Result;
- const auto &Res =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ // For bswap-only, limit shift amounts to whole bytes, for an early exit.
+ if (!MatchBitReversals && (BitShift.getZExtValue() % 8) != 0)
+ return Result;
+
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
if (!Res)
return Result;
Result = Res;
@@ -2948,8 +2983,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
if (!MatchBitReversals && (NumMaskedBits % 8) != 0)
return Result;
- const auto &Res =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
if (!Res)
return Result;
Result = Res;
@@ -2963,8 +2998,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If this is a zext instruction zero extend the result.
if (match(V, m_ZExt(m_Value(X)))) {
- const auto &Res =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
if (!Res)
return Result;
@@ -2977,11 +3012,24 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
}
+ // If this is a truncate instruction, extract the lower bits.
+ if (match(V, m_Trunc(m_Value(X)))) {
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!Res)
+ return Result;
+
+ Result = BitPart(Res->Provider, BitWidth);
+ for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
+ Result->Provenance[BitIdx] = Res->Provenance[BitIdx];
+ return Result;
+ }
+
// BITREVERSE - most likely due to us previous matching a partial
// bitreverse.
if (match(V, m_BitReverse(m_Value(X)))) {
- const auto &Res =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
if (!Res)
return Result;
@@ -2993,8 +3041,8 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// BSWAP - most likely due to us previous matching a partial bswap.
if (match(V, m_BSwap(m_Value(X)))) {
- const auto &Res =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ const auto &Res = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
if (!Res)
return Result;
@@ -3020,13 +3068,19 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
if (cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fshr)
ModAmt = BitWidth - ModAmt;
- const auto &LHS =
- collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
- const auto &RHS =
- collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS, Depth + 1);
+ // For bswap-only, limit shift amounts to whole bytes, for an early exit.
+ if (!MatchBitReversals && (ModAmt % 8) != 0)
+ return Result;
// Check we have both sources and they are from the same provider.
- if (!LHS || !RHS || !LHS->Provider || LHS->Provider != RHS->Provider)
+ const auto &LHS = collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!LHS || !LHS->Provider)
+ return Result;
+
+ const auto &RHS = collectBitParts(Y, MatchBSwaps, MatchBitReversals, BPS,
+ Depth + 1, FoundRoot);
+ if (!RHS || LHS->Provider != RHS->Provider)
return Result;
unsigned StartBitRHS = BitWidth - ModAmt;
@@ -3039,8 +3093,14 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
}
}
- // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
- // the input value to the bswap/bitreverse.
+ // If we've already found a root input value then we're never going to merge
+ // these back together.
+ if (FoundRoot)
+ return Result;
+
+ // Okay, we got to something that isn't a shift, 'or', 'and', etc. This must
+ // be the root input value to the bswap/bitreverse.
+ FoundRoot = true;
Result = BitPart(V, BitWidth);
for (unsigned BitIdx = 0; BitIdx < BitWidth; ++BitIdx)
Result->Provenance[BitIdx] = BitIdx;
@@ -3066,7 +3126,9 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
bool llvm::recognizeBSwapOrBitReverseIdiom(
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
SmallVectorImpl<Instruction *> &InsertedInsts) {
- if (Operator::getOpcode(I) != Instruction::Or)
+ if (!match(I, m_Or(m_Value(), m_Value())) &&
+ !match(I, m_FShl(m_Value(), m_Value(), m_Value())) &&
+ !match(I, m_FShr(m_Value(), m_Value(), m_Value())))
return false;
if (!MatchBSwaps && !MatchBitReversals)
return false;
@@ -3080,8 +3142,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
DemandedTy = Trunc->getType();
// Try to find all the pieces corresponding to the bswap.
+ bool FoundRoot = false;
std::map<Value *, Optional<BitPart>> BPS;
- auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
+ const auto &Res =
+ collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0, FoundRoot);
if (!Res)
return false;
ArrayRef<int8_t> BitProvenance = Res->Provenance;
@@ -3280,3 +3344,33 @@ Value *llvm::invertCondition(Value *Condition) {
Inverted->insertBefore(&*Parent->getFirstInsertionPt());
return Inverted;
}
+
+bool llvm::inferAttributesFromOthers(Function &F) {
+ // Note: We explicitly check for attributes rather than using cover functions
+ // because some of the cover functions include the logic being implemented.
+
+ bool Changed = false;
+ // readnone + not convergent implies nosync
+ if (!F.hasFnAttribute(Attribute::NoSync) &&
+ F.doesNotAccessMemory() && !F.isConvergent()) {
+ F.setNoSync();
+ Changed = true;
+ }
+
+ // readonly implies nofree
+ if (!F.hasFnAttribute(Attribute::NoFree) && F.onlyReadsMemory()) {
+ F.setDoesNotFreeMemory();
+ Changed = true;
+ }
+
+ // willreturn implies mustprogress
+ if (!F.hasFnAttribute(Attribute::MustProgress) && F.willReturn()) {
+ F.setMustProgress();
+ Changed = true;
+ }
+
+ // TODO: There are a bunch of cases of restrictive memory effects we
+ // can infer by inspecting arguments of argmemonly-ish functions.
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
index befacb591762..cd1f6f0c78a5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -211,9 +211,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Do not consider predicates that are known to be true or false
// independently of the loop iteration.
- if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
- SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
- RightSCEV))
+ if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
continue;
// Check if we have a condition with one AddRec and one non AddRec
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index b678efdc8d88..ff7905bed91d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -26,7 +26,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +46,10 @@ using namespace llvm;
STATISTIC(NumNotRotatedDueToHeaderSize,
"Number of loops not rotated due to the header size");
+STATISTIC(NumInstrsHoisted,
+ "Number of instructions hoisted into loop preheader");
+STATISTIC(NumInstrsDuplicated,
+ "Number of instructions cloned into loop preheader");
STATISTIC(NumRotated, "Number of loops rotated");
static cl::opt<bool>
@@ -179,9 +183,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
else
NewVal = UndefValue::get(OrigHeaderVal->getType());
- DbgValue->setOperand(0,
- MetadataAsValue::get(OrigHeaderVal->getContext(),
- ValueAsMetadata::get(NewVal)));
+ DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
}
}
}
@@ -386,11 +388,15 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// possible or create a clone in the OldPreHeader if not.
Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
- // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
+ // Record all debug intrinsics preceding LoopEntryBranch to avoid
+ // duplication.
using DbgIntrinsicHash =
- std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
+ std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>;
auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
- return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
+ auto VarLocOps = D->location_ops();
+ return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()),
+ D->getVariable()},
+ D->getExpression()};
};
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
@@ -422,11 +428,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
!Inst->mayWriteToMemory() && !Inst->isTerminator() &&
!isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
Inst->moveBefore(LoopEntryBranch);
+ ++NumInstrsHoisted;
continue;
}
// Otherwise, create a duplicate of the instruction.
Instruction *C = Inst->clone();
+ ++NumInstrsDuplicated;
// Eagerly remap the operands of the instruction.
RemapInstruction(C, ValueMap,
@@ -459,9 +467,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
C->setName(Inst->getName());
C->insertBefore(LoopEntryBranch);
- if (auto *II = dyn_cast<IntrinsicInst>(C))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
+ if (auto *II = dyn_cast<AssumeInst>(C))
+ AC->registerAssumption(II);
// MemorySSA cares whether the cloned instruction was inserted or not, and
// not whether it can be remapped to a simplified value.
if (MSSAU)
@@ -630,6 +637,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
}
assert(SplitLatchEdge &&
"Despite splitting all preds, failed to split latch exit?");
+ (void)SplitLatchEdge;
} else {
// We can fold the conditional branch in the preheader, this makes things
// simpler. The first step is to remove the extra edge to the Exit block.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 2e104334ad96..d2fd32c98d73 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -127,9 +127,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// Compute the set of predecessors of the loop that are not in the loop.
SmallVector<BasicBlock*, 8> OutsideBlocks;
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(Header)) {
if (!L->contains(P)) { // Coming in from outside the loop?
// If the loop is branched to from an indirect terminator, we won't
// be able to fully transform the loop, because it prohibits
@@ -381,9 +379,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
- for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
- BasicBlock *P = *I;
-
+ for (BasicBlock *P : predecessors(Header)) {
// Indirect edges cannot be split, so we must fail if we find one.
if (P->getTerminator()->isIndirectTerminator())
return nullptr;
@@ -505,12 +501,9 @@ ReprocessLoop:
if (*BB == L->getHeader()) continue;
SmallPtrSet<BasicBlock*, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB),
- PE = pred_end(*BB); PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(*BB))
if (!L->contains(P))
BadPreds.insert(P);
- }
// Delete each unique out-of-loop (and thus dead) predecessor.
for (BasicBlock *P : BadPreds) {
@@ -520,7 +513,7 @@ ReprocessLoop:
// Zap the dead pred's terminator and replace it with unreachable.
Instruction *TI = P->getTerminator();
- changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
+ changeToUnreachable(TI, PreserveLCSSA,
/*DTU=*/nullptr, MSSAU);
Changed = true;
}
@@ -872,9 +865,6 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- PA.preserve<BasicAA>();
- PA.preserve<GlobalsAA>();
- PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<DependenceAnalysis>();
if (MSSAAnalysis)
@@ -904,9 +894,8 @@ static void verifyLoop(Loop *L) {
// Indirectbr can interfere with preheader and unique backedge insertion.
if (!L->getLoopPreheader() || !L->getLoopLatch()) {
bool HasIndBrPred = false;
- for (pred_iterator PI = pred_begin(L->getHeader()),
- PE = pred_end(L->getHeader()); PI != PE; ++PI)
- if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ for (BasicBlock *Pred : predecessors(L->getHeader()))
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
HasIndBrPred = true;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index d4cd57405239..a91bf7b7af13 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -59,7 +59,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -220,26 +219,24 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
}
}
- // At this point, the code is well formed. We now do a quick sweep over the
- // inserted code, doing constant propagation and dead code elimination as we
- // go.
+ // At this point, the code is well formed. Perform constprop, instsimplify,
+ // and dce.
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : L->getBlocks()) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = &*I++;
-
if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
if (LI->replacementPreservesLCSSAForm(Inst, V))
Inst->replaceAllUsesWith(V);
if (isInstructionTriviallyDead(Inst))
- BB->getInstList().erase(Inst);
+ DeadInsts.emplace_back(Inst);
}
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
}
-
- // TODO: after peeling or unrolling, previously loop variant conditions are
- // likely to fold to constants, eagerly propagating those here will require
- // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be
- // appropriate.
}
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
@@ -247,32 +244,10 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
-/// TripCount is the upper bound of the iteration on which control exits
-/// LatchBlock. Control may exit the loop prior to TripCount iterations either
-/// via an early branch in other loop block or via LatchBlock terminator. This
-/// is relaxed from the general definition of trip count which is the number of
-/// times the loop header executes. Note that UnrollLoop assumes that the loop
-/// counter test is in LatchBlock in order to remove unnecesssary instances of
-/// the test. If control can exit the loop from the LatchBlock's terminator
-/// prior to TripCount iterations, flag PreserveCondBr needs to be set.
-///
-/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
-/// needs to be preserved. It is needed when we use trip count upper bound to
-/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
-/// conditional branch needs to be preserved.
-///
-/// Similarly, TripMultiple divides the number of times that the LatchBlock may
-/// execute without exiting the loop.
-///
-/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
-/// have a runtime (i.e. not compile time constant) trip count. Unrolling these
-/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
-/// iterations before branching into the unrolled loop. UnrollLoop will not
-/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
-/// AllowExpensiveTripCount is false.
-///
-/// If we want to perform PGO-based loop peeling, PeelCount is set to the
-/// number of iterations we want to peel off.
+/// If Runtime is true then UnrollLoop will try to insert a prologue or
+/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop
+/// will not runtime-unroll the loop if computing the run-time trip count will
+/// be expensive and AllowExpensiveTripCount is false.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
@@ -287,6 +262,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
const TargetTransformInfo *TTI,
OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA, Loop **RemainderLoop) {
+ assert(DT && "DomTree is required");
if (!L->getLoopPreheader()) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -311,56 +287,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}
- if (ULO.TripCount != 0)
- LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
- if (ULO.TripMultiple != 1)
- LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
-
- // Effectively "DCE" unrolled iterations that are beyond the tripcount
- // and will never be executed.
- if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
- ULO.Count = ULO.TripCount;
-
- // Don't enter the unroll code if there is nothing to do.
- if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
- LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
- return LoopUnrollResult::Unmodified;
- }
-
assert(ULO.Count > 0);
- assert(ULO.TripMultiple > 0);
- assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
-
- // Are we eliminating the loop control altogether?
- bool CompletelyUnroll = ULO.Count == ULO.TripCount;
-
- // We assume a run-time trip count if the compiler cannot
- // figure out the loop trip count and the unroll-runtime
- // flag is specified.
- bool RuntimeTripCount =
- (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
-
- assert((!RuntimeTripCount || !ULO.PeelCount) &&
- "Did not expect runtime trip-count unrolling "
- "and peeling for the same loop");
-
- bool Peeled = false;
- if (ULO.PeelCount) {
- Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
-
- // Successful peeling may result in a change in the loop preheader/trip
- // counts. If we later unroll the loop, we want these to be updated.
- if (Peeled) {
- // According to our guards and profitability checks the only
- // meaningful exit should be latch block. Other exits go to deopt,
- // so we do not worry about them.
- BasicBlock *ExitingBlock = L->getLoopLatch();
- assert(ExitingBlock && "Loop without exiting block?");
- assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
- ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
- }
- }
// All these values should be taken only after peeling because they might have
// changed.
@@ -371,6 +298,61 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
L->getExitBlocks(ExitBlocks);
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
+ const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+
+ // Effectively "DCE" unrolled iterations that are beyond the max tripcount
+ // and will never be executed.
+ if (MaxTripCount && ULO.Count > MaxTripCount)
+ ULO.Count = MaxTripCount;
+
+ struct ExitInfo {
+ unsigned TripCount;
+ unsigned TripMultiple;
+ unsigned BreakoutTrip;
+ bool ExitOnTrue;
+ SmallVector<BasicBlock *> ExitingBlocks;
+ };
+ DenseMap<BasicBlock *, ExitInfo> ExitInfos;
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (auto *ExitingBlock : ExitingBlocks) {
+ // The folding code is not prepared to deal with non-branch instructions
+ // right now.
+ auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI)
+ continue;
+
+ ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
+ Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ if (Info.TripCount != 0) {
+ Info.BreakoutTrip = Info.TripCount % ULO.Count;
+ Info.TripMultiple = 0;
+ } else {
+ Info.BreakoutTrip = Info.TripMultiple =
+ (unsigned)GreatestCommonDivisor64(ULO.Count, Info.TripMultiple);
+ }
+ Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
+ Info.ExitingBlocks.push_back(ExitingBlock);
+ LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
+ << ": TripCount=" << Info.TripCount
+ << ", TripMultiple=" << Info.TripMultiple
+ << ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
+ }
+
+ // Are we eliminating the loop control altogether? Note that we can know
+ // we're eliminating the backedge without knowing exactly which iteration
+ // of the unrolled body exits.
+ const bool CompletelyUnroll = ULO.Count == MaxTripCount;
+
+ const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
+
+ // There's no point in performing runtime unrolling if this unroll count
+ // results in a full unroll.
+ if (CompletelyUnroll)
+ ULO.Runtime = false;
+
// Go through all exits of L and see if there are any phi-nodes there. We just
// conservatively assume that they're inserted to preserve LCSSA form, which
// means that complete unrolling might break this form. We need to either fix
@@ -392,30 +374,16 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// A conditional branch which exits the loop, which can be optimized to an
// unconditional branch in the unrolled loop in some cases.
- BranchInst *ExitingBI = nullptr;
bool LatchIsExiting = L->isLoopExiting(LatchBlock);
- if (LatchIsExiting)
- ExitingBI = LatchBI;
- else if (BasicBlock *ExitingBlock = L->getExitingBlock())
- ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
- // If the peeling guard is changed this assert may be relaxed or even
- // deleted.
- assert(!Peeled && "Peeling guard changed!");
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}
- LLVM_DEBUG({
- if (ExitingBI)
- dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
- << "\n";
- else
- dbgs() << " No single exiting block\n";
- });
- // Loops containing convergent instructions must have a count that divides
- // their TripMultiple.
+ // Loops containing convergent instructions cannot use runtime unrolling,
+ // as the prologue/epilogue may add additional control-dependencies to
+ // convergent operations.
LLVM_DEBUG(
{
bool HasConvergent = false;
@@ -423,22 +391,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (auto &I : *BB)
if (auto *CB = dyn_cast<CallBase>(&I))
HasConvergent |= CB->isConvergent();
- assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
- "Unroll count must divide trip multiple if loop contains a "
- "convergent operation.");
+ assert((!HasConvergent || !ULO.Runtime) &&
+ "Can't runtime unroll if loop contains a convergent operation.");
});
bool EpilogProfitability =
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
: isEpilogProfitable(L);
- if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
+ if (ULO.Runtime &&
!UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
EpilogProfitability, ULO.UnrollRemainder,
ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
PreserveLCSSA, RemainderLoop)) {
if (ULO.Force)
- RuntimeTripCount = false;
+ ULO.Runtime = false;
else {
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
"generated when assuming runtime trip count\n");
@@ -446,71 +413,34 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
- // If we know the trip count, we know the multiple...
- unsigned BreakoutTrip = 0;
- if (ULO.TripCount != 0) {
- BreakoutTrip = ULO.TripCount % ULO.Count;
- ULO.TripMultiple = 0;
- } else {
- // Figure out what multiple to use.
- BreakoutTrip = ULO.TripMultiple =
- (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
- }
-
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << ULO.TripCount << "!\n");
+ << " with trip count " << ULO.Count << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
L->getHeader())
<< "completely unrolled loop with "
- << NV("UnrollCount", ULO.TripCount) << " iterations";
- });
- } else if (ULO.PeelCount) {
- LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
- << " with iteration count " << ULO.PeelCount << "!\n");
- if (ORE)
- ORE->emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
- L->getHeader())
- << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
- << " iterations";
+ << NV("UnrollCount", ULO.Count) << " iterations";
});
} else {
- auto DiagBuilder = [&]() {
- OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
- L->getHeader());
- return Diag << "unrolled loop by a factor of "
- << NV("UnrollCount", ULO.Count);
- };
-
LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
<< ULO.Count);
- if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
- LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
- if (ORE)
- ORE->emit([&]() {
- return DiagBuilder() << " with a breakout at trip "
- << NV("BreakoutTrip", BreakoutTrip);
- });
- } else if (ULO.TripMultiple != 1) {
- LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
- if (ORE)
- ORE->emit([&]() {
- return DiagBuilder()
- << " with " << NV("TripMultiple", ULO.TripMultiple)
- << " trips per branch";
- });
- } else if (RuntimeTripCount) {
+ if (ULO.Runtime)
LLVM_DEBUG(dbgs() << " with run-time trip count");
- if (ORE)
- ORE->emit(
- [&]() { return DiagBuilder() << " with run-time trip count"; });
- }
LLVM_DEBUG(dbgs() << "!\n");
+
+ if (ORE)
+ ORE->emit([&]() {
+ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+ L->getHeader());
+ Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
+ if (ULO.Runtime)
+ Diag << " with run-time trip count";
+ return Diag;
+ });
}
// We are going to make changes to this loop. SCEV may be keeping cached info
@@ -530,12 +460,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!LatchIsExiting)
++NumUnrolledNotLatch;
- Optional<bool> ContinueOnTrue = None;
- BasicBlock *LoopExit = nullptr;
- if (ExitingBI) {
- ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0));
- LoopExit = ExitingBI->getSuccessor(*ContinueOnTrue);
- }
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
@@ -546,15 +470,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
std::vector<BasicBlock *> Headers;
- std::vector<BasicBlock *> ExitingBlocks;
- std::vector<BasicBlock *> ExitingSucc;
std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
- if (ExitingBI) {
- ExitingBlocks.push_back(ExitingBI->getParent());
- ExitingSucc.push_back(ExitingBI->getSuccessor(!(*ContinueOnTrue)));
- }
// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
@@ -576,7 +494,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (Loop *SubLoop : *L)
LoopsToSimplify.insert(SubLoop);
- if (Header->getParent()->isDebugInfoForProfiling())
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
@@ -652,12 +572,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// Keep track of the exiting block and its successor block contained in
// the loop for the current iteration.
- if (ExitingBI) {
- if (*BB == ExitingBlocks[0])
- ExitingBlocks.push_back(New);
- if (*BB == ExitingSucc[0])
- ExitingSucc.push_back(New);
- }
+ auto ExitInfoIt = ExitInfos.find(*BB);
+ if (ExitInfoIt != ExitInfos.end())
+ ExitInfoIt->second.ExitingBlocks.push_back(New);
NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
@@ -666,28 +583,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// dedicated entry block (copy of the header block), this header's copy
// dominates all copied blocks. That means, dominance relations in the
// copied body are the same as in the original body.
- if (DT) {
- if (*BB == Header)
- DT->addNewBlock(New, Latches[It - 1]);
- else {
- auto BBDomNode = DT->getNode(*BB);
- auto BBIDom = BBDomNode->getIDom();
- BasicBlock *OriginalBBIDom = BBIDom->getBlock();
- DT->addNewBlock(
- New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
- }
+ if (*BB == Header)
+ DT->addNewBlock(New, Latches[It - 1]);
+ else {
+ auto BBDomNode = DT->getNode(*BB);
+ auto BBIDom = BBDomNode->getIDom();
+ BasicBlock *OriginalBBIDom = BBIDom->getBlock();
+ DT->addNewBlock(
+ New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
}
}
// Remap all instructions in the most recent iteration
remapInstructionsInBlocks(NewBlocks, LastValueMap);
- for (BasicBlock *NewBlock : NewBlocks) {
- for (Instruction &I : *NewBlock) {
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
- }
- }
+ for (BasicBlock *NewBlock : NewBlocks)
+ for (Instruction &I : *NewBlock)
+ if (auto *II = dyn_cast<AssumeInst>(&I))
+ AC->registerAssumption(II);
{
// Identify what other metadata depends on the cloned version. After
@@ -717,116 +629,18 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
- auto setDest = [](BasicBlock *Src, BasicBlock *Dest, BasicBlock *BlockInLoop,
- bool NeedConditional, Optional<bool> ContinueOnTrue,
- bool IsDestLoopExit) {
- auto *Term = cast<BranchInst>(Src->getTerminator());
- if (NeedConditional) {
- // Update the conditional branch's successor for the following
- // iteration.
- assert(ContinueOnTrue.hasValue() &&
- "Expecting valid ContinueOnTrue when NeedConditional is true");
- Term->setSuccessor(!(*ContinueOnTrue), Dest);
- } else {
- // Remove phi operands at this loop exit
- if (!IsDestLoopExit) {
- BasicBlock *BB = Src;
- for (BasicBlock *Succ : successors(BB)) {
- // Preserve the incoming value from BB if we are jumping to the block
- // in the current loop.
- if (Succ == BlockInLoop)
- continue;
- for (PHINode &Phi : Succ->phis())
- Phi.removeIncomingValue(BB, false);
- }
- }
- // Replace the conditional branch with an unconditional one.
- BranchInst::Create(Dest, Term);
- Term->eraseFromParent();
- }
- };
-
// Connect latches of the unrolled iterations to the headers of the next
- // iteration. If the latch is also the exiting block, the conditional branch
- // may have to be preserved.
+ // iteration. Currently they point to the header of the same iteration.
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The branch destination.
unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = LatchIsExiting;
-
- if (LatchIsExiting) {
- if (RuntimeTripCount && j != 0)
- NeedConditional = false;
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll) {
- if (j == 0)
- Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to
- // keep the conditional branch except the last one because the loop
- // may exit after any iteration.
- assert(NeedConditional &&
- "NeedCondition cannot be modified by both complete "
- "unrolling and runtime unrolling");
- NeedConditional =
- (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
- } else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
- }
- }
-
- setDest(Latches[i], Dest, Headers[i], NeedConditional, ContinueOnTrue,
- Dest == LoopExit);
- }
-
- if (!LatchIsExiting) {
- // If the latch is not exiting, we may be able to simplify the conditional
- // branches in the unrolled exiting blocks.
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- // The branch destination.
- unsigned j = (i + 1) % e;
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0)
- NeedConditional = false;
-
- if (CompletelyUnroll)
- // We cannot drop the conditional branch for the last condition, as we
- // may have to execute the loop body depending on the condition.
- NeedConditional = j == 0 || ULO.PreserveCondBr;
- else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
-
- // Conditional branches from non-latch exiting block have successors
- // either in the same loop iteration or outside the loop. The branches are
- // already correct.
- if (NeedConditional)
- continue;
- setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional,
- None, false);
- }
-
- // When completely unrolling, the last latch becomes unreachable.
- if (CompletelyUnroll) {
- BranchInst *Term = cast<BranchInst>(Latches.back()->getTerminator());
- new UnreachableInst(Term->getContext(), Term);
- Term->eraseFromParent();
- }
+ Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]);
}
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
// iterations too.
- if (DT && ULO.Count > 1) {
+ if (ULO.Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
@@ -835,42 +649,98 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!L->contains(ChildBB))
ChildrenToUpdate.push_back(ChildBB);
}
- BasicBlock *NewIDom;
- if (ExitingBI && BB == ExitingBlocks[0]) {
- // The latch is special because we emit unconditional branches in
- // some cases where the original loop contained a conditional branch.
- // Since the latch is always at the bottom of the loop, if the latch
- // dominated an exit before unrolling, the new dominator of that exit
- // must also be a latch. Specifically, the dominator is the first
- // latch which ends in a conditional branch, or the last latch if
- // there is no such latch.
- // For loops exiting from non latch exiting block, we limit the
- // branch simplification to single exiting block loops.
- NewIDom = ExitingBlocks.back();
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- Instruction *Term = ExitingBlocks[i]->getTerminator();
- if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom =
- DT->findNearestCommonDominator(ExitingBlocks[i], Latches[i]);
- break;
- }
- }
- } else {
- // The new idom of the block will be the nearest common dominator
- // of all copies of the previous idom. This is equivalent to the
- // nearest common dominator of the previous idom and the first latch,
- // which dominates all copies of the previous idom.
- NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
- }
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
for (auto *ChildBB : ChildrenToUpdate)
DT->changeImmediateDominator(ChildBB, NewIDom);
}
}
- assert(!DT || !UnrollVerifyDomtree ||
+ assert(!UnrollVerifyDomtree ||
DT->verify(DominatorTree::VerificationLevel::Fast));
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
+ auto *Term = cast<BranchInst>(Src->getTerminator());
+ const unsigned Idx = ExitOnTrue ^ WillExit;
+ BasicBlock *Dest = Term->getSuccessor(Idx);
+ BasicBlock *DeadSucc = Term->getSuccessor(1-Idx);
+
+ // Remove predecessors from all non-Dest successors.
+ DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true);
+
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
+
+ DTU.applyUpdates({{DominatorTree::Delete, Src, DeadSucc}});
+ };
+
+ auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
+ bool IsLatch) -> Optional<bool> {
+ if (CompletelyUnroll) {
+ if (PreserveOnlyFirst) {
+ if (i == 0)
+ return None;
+ return j == 0;
+ }
+ // Complete (but possibly inexact) unrolling
+ if (j == 0)
+ return true;
+ if (Info.TripCount && j != Info.TripCount)
+ return false;
+ return None;
+ }
+
+ if (ULO.Runtime) {
+ // If runtime unrolling inserts a prologue, information about non-latch
+ // exits may be stale.
+ if (IsLatch && j != 0)
+ return false;
+ return None;
+ }
+
+ if (j != Info.BreakoutTrip &&
+ (Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ return false;
+ }
+ return None;
+ };
+
+ // Fold branches for iterations where we know that they will exit or not
+ // exit.
+ for (const auto &Pair : ExitInfos) {
+ const ExitInfo &Info = Pair.second;
+ for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ bool IsLatch = Pair.first == LatchBlock;
+ Optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
+ if (!KnownWillExit)
+ continue;
+
+ // We don't fold known-exiting branches for non-latch exits here,
+ // because this ensures that both all loop blocks and all exit blocks
+ // remain reachable in the CFG.
+ // TODO: We could fold these branches, but it would require much more
+ // sophisticated updates to LoopInfo.
+ if (*KnownWillExit && !IsLatch)
+ continue;
+
+ SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
+ }
+ }
+
+ // When completely unrolling, the last latch becomes unreachable.
+ if (!LatchIsExiting && CompletelyUnroll)
+ changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA, &DTU);
+
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
@@ -893,8 +763,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
- SE, DT, AC, TTI);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
+ TTI);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
@@ -915,39 +785,36 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
- // If we have a pass and a DominatorTree we should re-simplify impacted loops
- // to ensure subsequent analyses can rely on this form. We want to simplify
+ // Make sure that loop-simplify form is preserved. We want to simplify
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
- if (DT) {
- if (OuterL) {
- // OuterL includes all loops for which we can break loop-simplify, so
- // it's sufficient to simplify only it (it'll recursively simplify inner
- // loops too).
- if (NeedToFixLCSSA) {
- // LCSSA must be performed on the outermost affected loop. The unrolled
- // loop's last loop latch is guaranteed to be in the outermost loop
- // after LoopInfo's been updated by LoopInfo::erase.
- Loop *LatchLoop = LI->getLoopFor(Latches.back());
- Loop *FixLCSSALoop = OuterL;
- if (!FixLCSSALoop->contains(LatchLoop))
- while (FixLCSSALoop->getParentLoop() != LatchLoop)
- FixLCSSALoop = FixLCSSALoop->getParentLoop();
-
- formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
- } else if (PreserveLCSSA) {
- assert(OuterL->isLCSSAForm(*DT) &&
- "Loops should be in LCSSA form after loop-unroll.");
- }
-
- // TODO: That potentially might be compile-time expensive. We should try
- // to fix the loop-simplified form incrementally.
- simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
- } else {
- // Simplify loops for which we might've broken loop-simplify form.
- for (Loop *SubLoop : LoopsToSimplify)
- simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ if (OuterL) {
+ // OuterL includes all loops for which we can break loop-simplify, so
+ // it's sufficient to simplify only it (it'll recursively simplify inner
+ // loops too).
+ if (NeedToFixLCSSA) {
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop
+ // after LoopInfo's been updated by LoopInfo::erase.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ Loop *FixLCSSALoop = OuterL;
+ if (!FixLCSSALoop->contains(LatchLoop))
+ while (FixLCSSALoop->getParentLoop() != LatchLoop)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+
+ formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
+ } else if (PreserveLCSSA) {
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
}
+
+ // TODO: That potentially might be compile-time expensive. We should try
+ // to fix the loop-simplified form incrementally.
+ simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+ } else {
+ // Simplify loops for which we might've broken loop-simplify form.
+ for (Loop *SubLoop : LoopsToSimplify)
+ simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
}
return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 6e32a2b865aa..6efaa012aeca 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -141,6 +141,7 @@ template <typename T>
static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
BasicBlockSet &AftBlocks, T Visit) {
SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> VisitedInstr;
for (auto &Phi : Header->phis()) {
Value *V = Phi.getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -151,11 +152,13 @@ static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
Instruction *I = Worklist.pop_back_val();
if (!Visit(I))
return false;
+ VisitedInstr.insert(I);
if (AftBlocks.count(I->getParent()))
for (auto &U : I->operands())
if (Instruction *II = dyn_cast<Instruction>(U))
- Worklist.push_back(II);
+ if (!VisitedInstr.count(II))
+ Worklist.push_back(II);
}
return true;
@@ -245,7 +248,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
bool CompletelyUnroll = (Count == TripCount);
// We use the runtime remainder in cases where we don't know trip multiple
- if (TripMultiple == 1 || TripMultiple % Count != 0) {
+ if (TripMultiple % Count != 0) {
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
/*UseEpilogRemainder*/ true,
UnrollRemainder, /*ForgetAllSCEV*/ false,
@@ -346,7 +349,9 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
- if (Header->getParent()->isDebugInfoForProfiling())
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
@@ -432,9 +437,8 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
remapInstructionsInBlocks(NewBlocks, LastValueMap);
for (BasicBlock *NewBlock : NewBlocks) {
for (Instruction &I : *NewBlock) {
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
+ if (auto *II = dyn_cast<AssumeInst>(&I))
+ AC->registerAssumption(II);
}
}
@@ -831,6 +835,23 @@ static bool isEligibleLoopForm(const Loop &Root) {
if (SubLoopsSize != 1)
return false;
+ // Only loops with a single exit block can be unrolled and jammed.
+ // The function getExitBlock() is used for this check, rather than
+ // getUniqueExitBlock() to ensure loops with mulitple exit edges are
+ // disallowed.
+ if (!L->getExitBlock()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit "
+ "blocks can be unrolled and jammed.\n");
+ return false;
+ }
+
+ // Only loops with a single exiting block can be unrolled and jammed.
+ if (!L->getExitingBlock()) {
+ LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single "
+ "exiting blocks can be unrolled and jammed.\n");
+ return false;
+ }
+
L = L->getSubLoops()[0];
} while (L);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 0abf62be156f..6749d3db743c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -50,6 +50,9 @@ static cl::opt<bool> UnrollRuntimeMultiExit(
"unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
cl::desc("Allow runtime unrolling for loops with multiple exits, when "
"epilog is generated"));
+static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
+ "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
+ cl::desc("Assume the non latch exit block to be predictable"));
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
@@ -493,12 +496,19 @@ static bool canProfitablyUnrollMultiExitLoop(
if (ExitingBlocks.size() > 2)
return false;
+ // Allow unrolling of loops with no non latch exit blocks.
+ if (OtherExits.size() == 0)
+ return true;
+
// The second heuristic is that L has one exit other than the latchexit and
// that exit is a deoptimize block. We know that deoptimize blocks are rarely
// taken, which also implies the branch leading to the deoptimize block is
- // highly predictable.
+ // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
+ // assume the other exit branch is predictable even if it has no deoptimize
+ // call.
return (OtherExits.size() == 1 &&
- OtherExits[0]->getTerminatingDeoptimizeCall());
+ (UnrollRuntimeOtherExitPredictable ||
+ OtherExits[0]->getTerminatingDeoptimizeCall()));
// TODO: These can be fine-tuned further to consider code size or deopt states
// that are captured by the deoptimize exit block.
// Also, we can extend this to support more cases, if we actually
@@ -974,11 +984,9 @@ bool llvm::UnrollRuntimeLoopRemainder(
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
UnrollResult =
UnrollLoop(remainderLoop,
- {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
- /*Force*/ false, /*AllowRuntime*/ false,
- /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
- /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
- /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
+ {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
+ /*AllowExpensiveTripCount*/ false,
+ /*UnrollRemainder*/ false, ForgetAllSCEV},
LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index f0f423e9812a..e4d78f9ada08 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -54,16 +54,10 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-static cl::opt<bool> ForceReductionIntrinsic(
- "force-reduction-intrinsics", cl::Hidden,
- cl::desc("Force creating reduction intrinsics for testing."),
- cl::init(false));
-
#define DEBUG_TYPE "loop-utils"
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
static const char *LLVMLoopDisableLICM = "llvm.licm.disable";
-static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
@@ -260,50 +254,8 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,
TheLoop->setLoopID(NewLoopID);
}
-/// Find string metadata for loop
-///
-/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
-/// operand or null otherwise. If the string metadata is not found return
-/// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
- StringRef Name) {
- MDNode *MD = findOptionMDForLoop(TheLoop, Name);
- if (!MD)
- return None;
- switch (MD->getNumOperands()) {
- case 1:
- return nullptr;
- case 2:
- return &MD->getOperand(1);
- default:
- llvm_unreachable("loop metadata has 0 or 1 operand");
- }
-}
-
-static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
- StringRef Name) {
- MDNode *MD = findOptionMDForLoop(TheLoop, Name);
- if (!MD)
- return None;
- switch (MD->getNumOperands()) {
- case 1:
- // When the value is absent it is interpreted as 'attribute set'.
- return true;
- case 2:
- if (ConstantInt *IntMD =
- mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
- return IntMD->getZExtValue();
- return true;
- }
- llvm_unreachable("unexpected number of options");
-}
-
-bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
- return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
-}
-
Optional<ElementCount>
-llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
+llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
Optional<int> Width =
getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
@@ -316,20 +268,6 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
return None;
}
-llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
- StringRef Name) {
- const MDOperand *AttrMD =
- findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
- if (!AttrMD)
- return None;
-
- ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
- if (!IntMD)
- return None;
-
- return IntMD->getSExtValue();
-}
-
Optional<MDNode *> llvm::makeFollowupLoopID(
MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
@@ -419,11 +357,7 @@ bool llvm::hasDisableLICMTransformsHint(const Loop *L) {
return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);
}
-bool llvm::hasMustProgress(const Loop *L) {
- return getBooleanLoopAttribute(L, LLVMLoopMustProgress);
-}
-
-TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
@@ -444,7 +378,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
return TM_SuppressedByUser;
@@ -462,7 +396,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
Optional<bool> Enable =
getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
@@ -498,7 +432,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
return TM_ForcedByUser;
@@ -508,7 +442,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) {
return TM_Unspecified;
}
-TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
return TM_SuppressedByUser;
@@ -789,8 +723,8 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
- (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,
- /*PreserveLCSSA*/true, &DTU, MSSAU.get());
+ (void)changeToUnreachable(BackedgeBB->getTerminator(),
+ /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
// Erase (and destroy) this loop instance. Handles relinking sub-loops
// and blocks within the loop as needed.
@@ -944,12 +878,6 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
break;
}
- // We only match FP sequences that are 'fast', so we can unconditionally
- // set it on any generated instructions.
- IRBuilderBase::FastMathFlagGuard FMFG(Builder);
- FastMathFlags FMF;
- FMF.setFast();
- Builder.setFastMathFlags(FMF);
Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
@@ -1031,14 +959,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
const TargetTransformInfo *TTI,
Value *Src, RecurKind RdxKind,
ArrayRef<Value *> RedOps) {
- unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
TargetTransformInfo::ReductionFlags RdxFlags;
RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||
RdxKind == RecurKind::FMax;
RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
- if (!ForceReductionIntrinsic &&
- !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
- return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
@@ -1076,7 +1000,8 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src) {
+ const RecurrenceDescriptor &Desc,
+ Value *Src) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
@@ -1085,6 +1010,17 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
}
+Value *llvm::createOrderedReduction(IRBuilderBase &B,
+ const RecurrenceDescriptor &Desc,
+ Value *Src, Value *Start) {
+ assert(Desc.getRecurrenceKind() == RecurKind::FAdd &&
+ "Unexpected reduction kind");
+ assert(Src->getType()->isVectorTy() && "Expected a vector type");
+ assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
+
+ return B.CreateFAddReduce(Start, Src);
+}
+
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
auto *VecOp = dyn_cast<Instruction>(I);
if (!VecOp)
@@ -1587,55 +1523,31 @@ struct PointerBounds {
/// in \p TheLoop. \return the values for the bounds.
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
Loop *TheLoop, Instruction *Loc,
- SCEVExpander &Exp, ScalarEvolution *SE) {
- // TODO: Add helper to retrieve pointers to CG.
- Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ SCEVExpander &Exp) {
LLVMContext &Ctx = Loc->getContext();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-
- if (SE->isLoopInvariant(Sc, TheLoop)) {
- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
- << *Ptr << "\n");
- // Ptr could be in the loop body. If so, expand a new one at the correct
- // location.
- Instruction *Inst = dyn_cast<Instruction>(Ptr);
- Value *NewPtr = (Inst && TheLoop->contains(Inst))
- ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
- : Ptr;
- // We must return a half-open range, which means incrementing Sc.
- const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
- Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
- return {NewPtr, NewPtrPlusOne};
- } else {
- Value *Start = nullptr, *End = nullptr;
- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
- LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
- << "\n");
- return {Start, End};
- }
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, CG->AddressSpace);
+
+ Value *Start = nullptr, *End = nullptr;
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
}
/// Turns a collection of checks into a collection of expanded upper and
/// lower bounds for both pointers in the check.
static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
- Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) {
+ Instruction *Loc, SCEVExpander &Exp) {
SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
// Here we're relying on the SCEV Expander's cache to only emit code for the
// same bounds once.
transform(PointerChecks, std::back_inserter(ChecksWithBounds),
[&](const RuntimePointerCheck &Check) {
- PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE),
- Second =
- expandBounds(Check.second, L, Loc, Exp, SE);
+ PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
+ Second = expandBounds(Check.second, L, Loc, Exp);
return std::make_pair(First, Second);
});
@@ -1645,12 +1557,10 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
- ScalarEvolution *SE) {
+ SCEVExpander &Exp) {
// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
- const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
- SCEVExpander Exp(*SE, DL, "induction");
- auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp);
+ auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
LLVMContext &Ctx = Loc->getContext();
Instruction *FirstInst = nullptr;
@@ -1722,3 +1632,177 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
FirstInst = GetFirstInst(FirstInst, Check, Loc);
return std::make_pair(FirstInst, Check);
}
+
+Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,
+ unsigned MSSAThreshold,
+ MemorySSA &MSSA,
+ AAResults &AA) {
+ auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());
+ if (!TI || !TI->isConditional())
+ return {};
+
+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+ // The case with the condition outside the loop should already be handled
+ // earlier.
+ if (!CondI || !L.contains(CondI))
+ return {};
+
+ SmallVector<Instruction *> InstToDuplicate;
+ InstToDuplicate.push_back(CondI);
+
+ SmallVector<Value *, 4> WorkList;
+ WorkList.append(CondI->op_begin(), CondI->op_end());
+
+ SmallVector<MemoryAccess *, 4> AccessesToCheck;
+ SmallVector<MemoryLocation, 4> AccessedLocs;
+ while (!WorkList.empty()) {
+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+ if (!I || !L.contains(I))
+ continue;
+
+ // TODO: support additional instructions.
+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+ return {};
+
+ // Do not duplicate volatile and atomic loads.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->isVolatile() || LI->isAtomic())
+ return {};
+
+ InstToDuplicate.push_back(I);
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
+ // Queue the defining access to check for alias checks.
+ AccessesToCheck.push_back(MemUse->getDefiningAccess());
+ AccessedLocs.push_back(MemoryLocation::get(I));
+ } else {
+ // MemoryDefs may clobber the location or may be atomic memory
+ // operations. Bail out.
+ return {};
+ }
+ }
+ WorkList.append(I->op_begin(), I->op_end());
+ }
+
+ if (InstToDuplicate.empty())
+ return {};
+
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ auto HasNoClobbersOnPath =
+ [&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,
+ MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,
+ SmallVector<MemoryAccess *, 4> AccessesToCheck)
+ -> Optional<IVConditionInfo> {
+ IVConditionInfo Info;
+ // First, collect all blocks in the loop that are on a patch from Succ
+ // to the header.
+ SmallVector<BasicBlock *, 4> WorkList;
+ WorkList.push_back(Succ);
+ WorkList.push_back(Header);
+ SmallPtrSet<BasicBlock *, 4> Seen;
+ Seen.insert(Header);
+ Info.PathIsNoop &=
+ all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
+
+ while (!WorkList.empty()) {
+ BasicBlock *Current = WorkList.pop_back_val();
+ if (!L.contains(Current))
+ continue;
+ const auto &SeenIns = Seen.insert(Current);
+ if (!SeenIns.second)
+ continue;
+
+ Info.PathIsNoop &= all_of(
+ *Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
+ WorkList.append(succ_begin(Current), succ_end(Current));
+ }
+
+ // Require at least 2 blocks on a path through the loop. This skips
+ // paths that directly exit the loop.
+ if (Seen.size() < 2)
+ return {};
+
+ // Next, check if there are any MemoryDefs that are on the path through
+ // the loop (in the Seen set) and they may-alias any of the locations in
+ // AccessedLocs. If that is the case, they may modify the condition and
+ // partial unswitching is not possible.
+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+ while (!AccessesToCheck.empty()) {
+ MemoryAccess *Current = AccessesToCheck.pop_back_val();
+ auto SeenI = SeenAccesses.insert(Current);
+ if (!SeenI.second || !Seen.contains(Current->getBlock()))
+ continue;
+
+ // Bail out if exceeded the threshold.
+ if (SeenAccesses.size() >= MSSAThreshold)
+ return {};
+
+ // MemoryUse are read-only accesses.
+ if (isa<MemoryUse>(Current))
+ continue;
+
+ // For a MemoryDef, check if is aliases any of the location feeding
+ // the original condition.
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+ if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {
+ return isModSet(
+ AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+ }))
+ return {};
+ }
+
+ for (Use &U : Current->uses())
+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+ }
+
+ // We could also allow loops with known trip counts without mustprogress,
+ // but ScalarEvolution may not be available.
+ Info.PathIsNoop &= isMustProgress(&L);
+
+ // If the path is considered a no-op so far, check if it reaches a
+ // single exit block without any phis. This ensures no values from the
+ // loop are used outside of the loop.
+ if (Info.PathIsNoop) {
+ for (auto *Exiting : ExitingBlocks) {
+ if (!Seen.contains(Exiting))
+ continue;
+ for (auto *Succ : successors(Exiting)) {
+ if (L.contains(Succ))
+ continue;
+
+ Info.PathIsNoop &= llvm::empty(Succ->phis()) &&
+ (!Info.ExitForPath || Info.ExitForPath == Succ);
+ if (!Info.PathIsNoop)
+ break;
+ assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&
+ "cannot have multiple exit blocks");
+ Info.ExitForPath = Succ;
+ }
+ }
+ }
+ if (!Info.ExitForPath)
+ Info.PathIsNoop = false;
+
+ Info.InstToDuplicate = InstToDuplicate;
+ return Info;
+ };
+
+ // If we branch to the same successor, partial unswitching will not be
+ // beneficial.
+ if (TI->getSuccessor(0) == TI->getSuccessor(1))
+ return {};
+
+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),
+ AccessesToCheck)) {
+ Info->KnownValue = ConstantInt::getTrue(TI->getContext());
+ return Info;
+ }
+ if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),
+ AccessesToCheck)) {
+ Info->KnownValue = ConstantInt::getFalse(TI->getContext());
+ return Info;
+ }
+
+ return {};
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 599bd1feb2bc..8a89158788cf 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -44,11 +44,11 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI,
AliasChecks(Checks.begin(), Checks.end()),
Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT),
SE(SE) {
- assert(L->getUniqueExitBlock() && "No single exit block");
}
void LoopVersioning::versionLoop(
const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ assert(VersionedLoop->getUniqueExitBlock() && "No single exit block");
assert(VersionedLoop->isLoopSimplifyForm() &&
"Loop is not in loop-simplify form");
@@ -60,9 +60,12 @@ void LoopVersioning::versionLoop(
// Add the memcheck in the original preheader (this is empty initially).
BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
- std::tie(FirstCheckInst, MemRuntimeCheck) =
- addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop,
- AliasChecks, RtPtrChecking.getSE());
+
+ SCEVExpander Exp2(*RtPtrChecking.getSE(),
+ VersionedLoop->getHeader()->getModule()->getDataLayout(),
+ "induction");
+ std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks(
+ RuntimeCheckBB->getTerminator(), VersionedLoop, AliasChecks, Exp2);
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
"scev.check");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
new file mode 100644
index 000000000000..68d4dd9d576b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
@@ -0,0 +1,408 @@
+//===-- MemoryOpRemark.cpp - Auto-init remark analysis---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the analysis for the "auto-init" remark.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MemoryOpRemark.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+
+using namespace llvm;
+using namespace llvm::ore;
+
+MemoryOpRemark::~MemoryOpRemark() = default;
+
+bool MemoryOpRemark::canHandle(const Instruction *I, const TargetLibraryInfo &TLI) {
+ if (isa<StoreInst>(I))
+ return true;
+
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::memcpy_inline:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memset_element_unordered_atomic:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ auto *CF = CI->getCalledFunction();
+ if (!CF)
+ return false;
+
+ if (!CF->hasName())
+ return false;
+
+ LibFunc LF;
+ bool KnownLibCall = TLI.getLibFunc(*CF, LF) && TLI.has(LF);
+ if (!KnownLibCall)
+ return false;
+
+ switch (LF) {
+ case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy_chk:
+ case LibFunc_memset_chk:
+ case LibFunc_memmove_chk:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memset:
+ case LibFunc_memmove:
+ case LibFunc_bzero:
+ case LibFunc_bcopy:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
+void MemoryOpRemark::visit(const Instruction *I) {
+ // For some of them, we can provide more information:
+
+ // For stores:
+ // * size
+ // * volatile / atomic
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ visitStore(*SI);
+ return;
+ }
+
+ // For intrinsics:
+ // * user-friendly name
+ // * size
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ visitIntrinsicCall(*II);
+ return;
+ }
+
+ // For calls:
+ // * known/unknown function (e.g. the compiler knows bzero, but it doesn't
+ // know my_bzero)
+ // * memory operation size
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ visitCall(*CI);
+ return;
+ }
+
+ visitUnknown(*I);
+}
+
+std::string MemoryOpRemark::explainSource(StringRef Type) const {
+ return (Type + ".").str();
+}
+
+StringRef MemoryOpRemark::remarkName(RemarkKind RK) const {
+ switch (RK) {
+ case RK_Store:
+ return "MemoryOpStore";
+ case RK_Unknown:
+ return "MemoryOpUnknown";
+ case RK_IntrinsicCall:
+ return "MemoryOpIntrinsicCall";
+ case RK_Call:
+ return "MemoryOpCall";
+ }
+ llvm_unreachable("missing RemarkKind case");
+}
+
+static void inlineVolatileOrAtomicWithExtraArgs(bool *Inline, bool Volatile,
+ bool Atomic,
+ DiagnosticInfoIROptimization &R) {
+ if (Inline && *Inline)
+ R << " Inlined: " << NV("StoreInlined", true) << ".";
+ if (Volatile)
+ R << " Volatile: " << NV("StoreVolatile", true) << ".";
+ if (Atomic)
+ R << " Atomic: " << NV("StoreAtomic", true) << ".";
+ // Emit the false cases under ExtraArgs. This won't show them in the remark
+ // message but will end up in the serialized remarks.
+ if ((Inline && !*Inline) || !Volatile || !Atomic)
+ R << setExtraArgs();
+ if (Inline && !*Inline)
+ R << " Inlined: " << NV("StoreInlined", false) << ".";
+ if (!Volatile)
+ R << " Volatile: " << NV("StoreVolatile", false) << ".";
+ if (!Atomic)
+ R << " Atomic: " << NV("StoreAtomic", false) << ".";
+}
+
+static Optional<uint64_t> getSizeInBytes(Optional<uint64_t> SizeInBits) {
+ if (!SizeInBits || *SizeInBits % 8 != 0)
+ return None;
+ return *SizeInBits / 8;
+}
+
+template<typename ...Ts>
+std::unique_ptr<DiagnosticInfoIROptimization>
+MemoryOpRemark::makeRemark(Ts... Args) {
+ switch (diagnosticKind()) {
+ case DK_OptimizationRemarkAnalysis:
+ return std::make_unique<OptimizationRemarkAnalysis>(Args...);
+ case DK_OptimizationRemarkMissed:
+ return std::make_unique<OptimizationRemarkMissed>(Args...);
+ default:
+ llvm_unreachable("unexpected DiagnosticKind");
+ }
+}
+
+void MemoryOpRemark::visitStore(const StoreInst &SI) {
+ bool Volatile = SI.isVolatile();
+ bool Atomic = SI.isAtomic();
+ int64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_Store), &SI);
+ *R << explainSource("Store") << "\nStore size: " << NV("StoreSize", Size)
+ << " bytes.";
+ visitPtr(SI.getOperand(1), /*IsRead=*/false, *R);
+ inlineVolatileOrAtomicWithExtraArgs(nullptr, Volatile, Atomic, *R);
+ ORE.emit(*R);
+}
+
+void MemoryOpRemark::visitUnknown(const Instruction &I) {
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_Unknown), &I);
+ *R << explainSource("Initialization");
+ ORE.emit(*R);
+}
+
+void MemoryOpRemark::visitIntrinsicCall(const IntrinsicInst &II) {
+ SmallString<32> CallTo;
+ bool Atomic = false;
+ bool Inline = false;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::memcpy_inline:
+ CallTo = "memcpy";
+ Inline = true;
+ break;
+ case Intrinsic::memcpy:
+ CallTo = "memcpy";
+ break;
+ case Intrinsic::memmove:
+ CallTo = "memmove";
+ break;
+ case Intrinsic::memset:
+ CallTo = "memset";
+ break;
+ case Intrinsic::memcpy_element_unordered_atomic:
+ CallTo = "memcpy";
+ Atomic = true;
+ break;
+ case Intrinsic::memmove_element_unordered_atomic:
+ CallTo = "memmove";
+ Atomic = true;
+ break;
+ case Intrinsic::memset_element_unordered_atomic:
+ CallTo = "memset";
+ Atomic = true;
+ break;
+ default:
+ return visitUnknown(II);
+ }
+
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_IntrinsicCall), &II);
+ visitCallee(CallTo.str(), /*KnownLibCall=*/true, *R);
+ visitSizeOperand(II.getOperand(2), *R);
+
+ auto *CIVolatile = dyn_cast<ConstantInt>(II.getOperand(3));
+ // No such thing as a memory intrinsic that is both atomic and volatile.
+ bool Volatile = !Atomic && CIVolatile && CIVolatile->getZExtValue();
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::memcpy_inline:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ visitPtr(II.getOperand(1), /*IsRead=*/true, *R);
+ visitPtr(II.getOperand(0), /*IsRead=*/false, *R);
+ break;
+ case Intrinsic::memset:
+ case Intrinsic::memset_element_unordered_atomic:
+ visitPtr(II.getOperand(0), /*IsRead=*/false, *R);
+ break;
+ }
+ inlineVolatileOrAtomicWithExtraArgs(&Inline, Volatile, Atomic, *R);
+ ORE.emit(*R);
+}
+
+void MemoryOpRemark::visitCall(const CallInst &CI) {
+ Function *F = CI.getCalledFunction();
+ if (!F)
+ return visitUnknown(CI);
+
+ LibFunc LF;
+ bool KnownLibCall = TLI.getLibFunc(*F, LF) && TLI.has(LF);
+ auto R = makeRemark(RemarkPass.data(), remarkName(RK_Call), &CI);
+ visitCallee(F, KnownLibCall, *R);
+ visitKnownLibCall(CI, LF, *R);
+ ORE.emit(*R);
+}
+
+template <typename FTy>
+void MemoryOpRemark::visitCallee(FTy F, bool KnownLibCall,
+ DiagnosticInfoIROptimization &R) {
+ R << "Call to ";
+ if (!KnownLibCall)
+ R << NV("UnknownLibCall", "unknown") << " function ";
+ R << NV("Callee", F) << explainSource("");
+}
+
+void MemoryOpRemark::visitKnownLibCall(const CallInst &CI, LibFunc LF,
+ DiagnosticInfoIROptimization &R) {
+ switch (LF) {
+ default:
+ return;
+ case LibFunc_memset_chk:
+ case LibFunc_memset:
+ visitSizeOperand(CI.getOperand(2), R);
+ visitPtr(CI.getOperand(0), /*IsRead=*/false, R);
+ break;
+ case LibFunc_bzero:
+ visitSizeOperand(CI.getOperand(1), R);
+ visitPtr(CI.getOperand(0), /*IsRead=*/false, R);
+ break;
+ case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy_chk:
+ case LibFunc_memmove_chk:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memmove:
+ case LibFunc_bcopy:
+ visitSizeOperand(CI.getOperand(2), R);
+ visitPtr(CI.getOperand(1), /*IsRead=*/true, R);
+ visitPtr(CI.getOperand(0), /*IsRead=*/false, R);
+ break;
+ }
+}
+
+void MemoryOpRemark::visitSizeOperand(Value *V, DiagnosticInfoIROptimization &R) {
+ if (auto *Len = dyn_cast<ConstantInt>(V)) {
+ uint64_t Size = Len->getZExtValue();
+ R << " Memory operation size: " << NV("StoreSize", Size) << " bytes.";
+ }
+}
+
+static Optional<StringRef> nameOrNone(const Value *V) {
+ if (V->hasName())
+ return V->getName();
+ return None;
+}
+
+void MemoryOpRemark::visitVariable(const Value *V,
+ SmallVectorImpl<VariableInfo> &Result) {
+ if (auto *GV = dyn_cast<GlobalVariable>(V)) {
+ auto *Ty = GV->getValueType();
+ uint64_t Size = DL.getTypeSizeInBits(Ty).getFixedSize();
+ VariableInfo Var{nameOrNone(GV), Size};
+ if (!Var.isEmpty())
+ Result.push_back(std::move(Var));
+ return;
+ }
+
+ // If we find some information in the debug info, take that.
+ bool FoundDI = false;
+ // Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the
+ // real debug info name and size of the variable.
+ for (const DbgVariableIntrinsic *DVI :
+ FindDbgAddrUses(const_cast<Value *>(V))) {
+ if (DILocalVariable *DILV = DVI->getVariable()) {
+ Optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits());
+ VariableInfo Var{DILV->getName(), DISize};
+ if (!Var.isEmpty()) {
+ Result.push_back(std::move(Var));
+ FoundDI = true;
+ }
+ }
+ }
+ if (FoundDI) {
+ assert(!Result.empty());
+ return;
+ }
+
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI)
+ return;
+
+ // If not, get it from the alloca.
+ Optional<TypeSize> TySize = AI->getAllocationSizeInBits(DL);
+ Optional<uint64_t> Size =
+ TySize ? getSizeInBytes(TySize->getFixedSize()) : None;
+ VariableInfo Var{nameOrNone(AI), Size};
+ if (!Var.isEmpty())
+ Result.push_back(std::move(Var));
+}
+
+void MemoryOpRemark::visitPtr(Value *Ptr, bool IsRead, DiagnosticInfoIROptimization &R) {
+ // Find if Ptr is a known variable we can give more information on.
+ SmallVector<Value *, 2> Objects;
+ getUnderlyingObjectsForCodeGen(Ptr, Objects);
+ SmallVector<VariableInfo, 2> VIs;
+ for (const Value *V : Objects)
+ visitVariable(V, VIs);
+
+ if (VIs.empty()) {
+ bool CanBeNull;
+ bool CanBeFreed;
+ uint64_t Size = Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
+ if (!Size)
+ return;
+ VIs.push_back({None, Size});
+ }
+
+ R << (IsRead ? "\n Read Variables: " : "\n Written Variables: ");
+ for (unsigned i = 0; i < VIs.size(); ++i) {
+ const VariableInfo &VI = VIs[i];
+ assert(!VI.isEmpty() && "No extra content to display.");
+ if (i != 0)
+ R << ", ";
+ if (VI.Name)
+ R << NV(IsRead ? "RVarName" : "WVarName", *VI.Name);
+ else
+ R << NV(IsRead ? "RVarName" : "WVarName", "<unknown>");
+ if (VI.Size)
+ R << " (" << NV(IsRead ? "RVarSize" : "WVarSize", *VI.Size) << " bytes)";
+ }
+ R << ".";
+}
+
+bool AutoInitRemark::canHandle(const Instruction *I) {
+ if (!I->hasMetadata(LLVMContext::MD_annotation))
+ return false;
+ return any_of(I->getMetadata(LLVMContext::MD_annotation)->operands(),
+ [](const MDOperand &Op) {
+ return cast<MDString>(Op.get())->getString() == "auto-init";
+ });
+}
+
+std::string AutoInitRemark::explainSource(StringRef Type) const {
+ return (Type + " inserted by -ftrivial-auto-var-init.").str();
+}
+
+StringRef AutoInitRemark::remarkName(RemarkKind RK) const {
+ switch (RK) {
+ case RK_Store:
+ return "AutoInitStore";
+ case RK_Unknown:
+ return "AutoInitUnknownInstruction";
+ case RK_IntrinsicCall:
+ return "AutoInitIntrinsicCall";
+ case RK_Call:
+ return "AutoInitCall";
+ }
+ llvm_unreachable("missing RemarkKind case");
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index e350320e7569..b1965cf2becb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -67,9 +67,9 @@ struct Renamer {
};
void MetaRename(Function &F) {
- for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
- if (!AI->getType()->isVoidTy())
- AI->setName("arg");
+ for (Argument &Arg : F.args())
+ if (!Arg.getType()->isVoidTy())
+ Arg.setName("arg");
for (auto &BB : F) {
BB.setName("bb");
@@ -101,12 +101,12 @@ void MetaRename(Module &M,
}
// Rename all global variables
- for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
- StringRef Name = GI->getName();
+ for (GlobalVariable &GV : M.globals()) {
+ StringRef Name = GV.getName();
if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
continue;
- GI->setName("global");
+ GV.setName("global");
}
// Rename all struct types
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index ef9f18a2289e..2aef37205c53 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -76,18 +76,20 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *>
SmallPtrSet<Constant *, 16> InitAsSet;
SmallVector<Constant *, 16> Init;
if (GV) {
- auto *CA = cast<ConstantArray>(GV->getInitializer());
- for (auto &Op : CA->operands()) {
- Constant *C = cast_or_null<Constant>(Op);
- if (InitAsSet.insert(C).second)
- Init.push_back(C);
+ if (GV->hasInitializer()) {
+ auto *CA = cast<ConstantArray>(GV->getInitializer());
+ for (auto &Op : CA->operands()) {
+ Constant *C = cast_or_null<Constant>(Op);
+ if (InitAsSet.insert(C).second)
+ Init.push_back(C);
+ }
}
GV->eraseFromParent();
}
Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
for (auto *V : Values) {
- Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
+ Constant *C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, Int8PtrTy);
if (InitAsSet.insert(C).second)
Init.push_back(C);
}
@@ -120,11 +122,14 @@ llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
}
Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
- Function *Ctor = Function::Create(
+ Function *Ctor = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
- GlobalValue::InternalLinkage, CtorName, &M);
+ GlobalValue::InternalLinkage, 0, CtorName, &M);
+ Ctor->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
ReturnInst::Create(M.getContext(), CtorBB);
+ // Ensure Ctor cannot be discarded, even if in a comdat.
+ appendToUsed(M, {Ctor});
return Ctor;
}
@@ -172,28 +177,6 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
return std::make_pair(Ctor, InitFunction);
}
-Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
- assert(!Name.empty() && "Expected init function name");
- if (Function *F = M.getFunction(Name)) {
- if (F->arg_size() != 0 ||
- F->getReturnType() != Type::getVoidTy(M.getContext())) {
- std::string Err;
- raw_string_ostream Stream(Err);
- Stream << "Sanitizer interface function defined with wrong type: " << *F;
- report_fatal_error(Err);
- }
- return F;
- }
- Function *F =
- cast<Function>(M.getOrInsertFunction(Name, AttributeList(),
- Type::getVoidTy(M.getContext()))
- .getCallee());
-
- appendToGlobalCtors(M, F, 0);
-
- return F;
-}
-
void llvm::filterDeadComdatFunctions(
Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
// Build a map from the comdat to the number of entries in that comdat we
@@ -287,7 +270,7 @@ std::string llvm::getUniqueModuleId(Module *M) {
SmallString<32> Str;
MD5::stringifyResult(R, Str);
- return ("$" + Str).str();
+ return ("." + Str).str();
}
void VFABI::setVectorVariantNames(
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 3312a6f9459b..91280762aaa7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -24,7 +23,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -77,8 +75,7 @@ static Instruction *getBranchTerminator(const PredicateBase *PB) {
// Given a predicate info that is a type of branching terminator, get the
// edge this predicate info represents
-const std::pair<BasicBlock *, BasicBlock *>
-getBlockEdge(const PredicateBase *PB) {
+std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const PredicateBase *PB) {
assert(isa<PredicateWithEdge>(PB) &&
"Not a predicate info type we know how to get an edge from.");
const auto *PEdge = cast<PredicateWithEdge>(PB);
@@ -158,8 +155,7 @@ struct ValueDFS_Compare {
}
// For a phi use, or a non-materialized def, return the edge it represents.
- const std::pair<BasicBlock *, BasicBlock *>
- getBlockEdge(const ValueDFS &VD) const {
+ std::pair<BasicBlock *, BasicBlock *> getBlockEdge(const ValueDFS &VD) const {
if (!VD.Def && VD.U) {
auto *PHI = cast<PHINode>(VD.U->getUser());
return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent());
@@ -541,21 +537,6 @@ void PredicateInfoBuilder::buildPredicateInfo() {
renameUses(OpsToRename);
}
-// Create a ssa_copy declaration with custom mangling, because
-// Intrinsic::getDeclaration does not handle overloaded unnamed types properly:
-// all unnamed types get mangled to the same string. We use the pointer
-// to the type as name here, as it guarantees unique names for different
-// types and we remove the declarations when destroying PredicateInfo.
-// It is a workaround for PR38117, because solving it in a fully general way is
-// tricky (FIXME).
-static Function *getCopyDeclaration(Module *M, Type *Ty) {
- std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty);
- return cast<Function>(
- M->getOrInsertFunction(Name,
- getType(M->getContext(), Intrinsic::ssa_copy, Ty))
- .getCallee());
-}
-
// Given the renaming stack, make all the operands currently on the stack real
// by inserting them into the IR. Return the last operation's value.
Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
@@ -587,9 +568,8 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// order in the case of multiple predicateinfo in the same block.
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
- Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (IF->users().empty())
- PI.CreatedDeclarations.insert(IF);
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
PI.PredicateMap.insert({PIC, ValInfo});
@@ -601,9 +581,8 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// Insert the predicate directly after the assume. While it also holds
// directly before it, assume(i1 true) is not a useful fact.
IRBuilder<> B(PAssume->AssumeInst->getNextNode());
- Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
- if (IF->users().empty())
- PI.CreatedDeclarations.insert(IF);
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
CallInst *PIC = B.CreateCall(IF, Op);
PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
@@ -782,23 +761,6 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
Builder.buildPredicateInfo();
}
-// Remove all declarations we created . The PredicateInfo consumers are
-// responsible for remove the ssa_copy calls created.
-PredicateInfo::~PredicateInfo() {
- // Collect function pointers in set first, as SmallSet uses a SmallVector
- // internally and we have to remove the asserting value handles first.
- SmallPtrSet<Function *, 20> FunctionPtrs;
- for (auto &F : CreatedDeclarations)
- FunctionPtrs.insert(&*F);
- CreatedDeclarations.clear();
-
- for (Function *F : FunctionPtrs) {
- assert(F->user_begin() == F->user_end() &&
- "PredicateInfo consumer did not remove all SSA copies.");
- F->eraseFromParent();
- }
-}
-
Optional<PredicateConstraint> PredicateBase::getConstraint() const {
switch (Type) {
case PT_Assume:
@@ -865,20 +827,6 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
}
-// Replace ssa_copy calls created by PredicateInfo with their operand.
-static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
- for (auto I = inst_begin(F), E = inst_end(F); I != E;) {
- Instruction *Inst = &*I++;
- const auto *PI = PredInfo.getPredicateInfoFor(Inst);
- auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
- continue;
-
- Inst->replaceAllUsesWith(II->getOperand(0));
- Inst->eraseFromParent();
- }
-}
-
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -886,8 +834,6 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
-
- replaceCreatedSSACopys(*PredInfo, F);
return false;
}
@@ -899,7 +845,6 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(OS);
- replaceCreatedSSACopys(*PredInfo, F);
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 86bbb6a889e6..427028066026 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -306,17 +306,15 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
LoadNotNull->insertAfter(LI);
CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
CI->insertAfter(LoadNotNull);
- AC->registerAssumption(CI);
+ AC->registerAssumption(cast<AssumeInst>(CI));
}
static void removeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
- for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) {
- Instruction *I = cast<Instruction>(UI->getUser());
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(AI->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
if (isa<LoadInst>(I) || isa<StoreInst>(I))
continue;
@@ -330,10 +328,8 @@ static void removeIntrinsicUsers(AllocaInst *AI) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
- for (auto UUI = I->use_begin(), UUE = I->use_end(); UUI != UUE;) {
- Instruction *Inst = cast<Instruction>(UUI->getUser());
- Use &UU = *UUI;
- ++UUI;
+ for (Use &UU : llvm::make_early_inc_range(I->uses())) {
+ Instruction *Inst = cast<Instruction>(UU.getUser());
// Drop the use of I in droppable instructions.
if (Inst->isDroppable()) {
@@ -403,7 +399,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// If the replacement value is the load, this must occur in unreachable
// code.
if (ReplVal == LI)
- ReplVal = UndefValue::get(LI->getType());
+ ReplVal = PoisonValue::get(LI->getType());
// If the load was marked as nonnull we don't want to lose
// that information when we erase this Load. So we preserve
@@ -512,7 +508,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// If the replacement value is the load, this must occur in unreachable
// code.
if (ReplVal == LI)
- ReplVal = UndefValue::get(LI->getType());
+ ReplVal = PoisonValue::get(LI->getType());
LI->replaceAllUsesWith(ReplVal);
}
@@ -676,7 +672,7 @@ void PromoteMem2Reg::run() {
// unreachable basic blocks that were not processed by walking the dominator
// tree. Just delete the users now.
if (!A->use_empty())
- A->replaceAllUsesWith(UndefValue::get(A->getType()));
+ A->replaceAllUsesWith(PoisonValue::get(A->getType()));
A->eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
new file mode 100644
index 000000000000..85e5adaeaf5e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -0,0 +1,212 @@
+//===- RelLookupTableConverterPass - Rel Table Conv -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements relative lookup table converter that converts
+// lookup tables to relative lookup tables to make them PIC-friendly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
+ // If lookup table has more than one user,
+ // do not generate a relative lookup table.
+ // This is to simplify the analysis that needs to be done for this pass.
+ // TODO: Add support for lookup tables with multiple uses.
+ // For ex, this can happen when a function that uses a lookup table gets
+ // inlined into multiple call sites.
+ if (!GV.hasInitializer() ||
+ !GV.isConstant() ||
+ !GV.hasOneUse())
+ return false;
+
+ GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser());
+ if (!GEP || !GEP->hasOneUse())
+ return false;
+
+ LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser());
+ if (!Load || !Load->hasOneUse())
+ return false;
+
+ // If the original lookup table does not have local linkage and is
+ // not dso_local, do not generate a relative lookup table.
+ // This optimization creates a relative lookup table that consists of
+ // offsets between the start of the lookup table and its elements.
+ // To be able to generate these offsets, relative lookup table and
+ // its elements should have internal linkage and be dso_local, which means
+ // that they should resolve to symbols within the same linkage unit.
+ if (!GV.hasLocalLinkage() ||
+ !GV.isDSOLocal() ||
+ !GV.isImplicitDSOLocal())
+ return false;
+
+ ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer());
+ // If values are not pointers, do not generate a relative lookup table.
+ if (!Array || !Array->getType()->getElementType()->isPointerTy())
+ return false;
+
+ const DataLayout &DL = M.getDataLayout();
+ for (const Use &Op : Array->operands()) {
+ Constant *ConstOp = cast<Constant>(&Op);
+ GlobalValue *GVOp;
+ APInt Offset;
+
+ // If an operand is not a constant offset from a lookup table,
+ // do not generate a relative lookup table.
+ if (!IsConstantOffsetFromGlobal(ConstOp, GVOp, Offset, DL))
+ return false;
+
+ // If operand is mutable, do not generate a relative lookup table.
+ auto *GlovalVarOp = dyn_cast<GlobalVariable>(GVOp);
+ if (!GlovalVarOp || !GlovalVarOp->isConstant())
+ return false;
+
+ if (!GlovalVarOp->hasLocalLinkage() ||
+ !GlovalVarOp->isDSOLocal() ||
+ !GlovalVarOp->isImplicitDSOLocal())
+ return false;
+ }
+
+ return true;
+}
+
+static GlobalVariable *createRelLookupTable(Function &Func,
+ GlobalVariable &LookupTable) {
+ Module &M = *Func.getParent();
+ ConstantArray *LookupTableArr =
+ cast<ConstantArray>(LookupTable.getInitializer());
+ unsigned NumElts = LookupTableArr->getType()->getNumElements();
+ ArrayType *IntArrayTy =
+ ArrayType::get(Type::getInt32Ty(M.getContext()), NumElts);
+
+ GlobalVariable *RelLookupTable = new GlobalVariable(
+ M, IntArrayTy, LookupTable.isConstant(), LookupTable.getLinkage(),
+ nullptr, "reltable." + Func.getName(), &LookupTable,
+ LookupTable.getThreadLocalMode(), LookupTable.getAddressSpace(),
+ LookupTable.isExternallyInitialized());
+
+ uint64_t Idx = 0;
+ SmallVector<Constant *, 64> RelLookupTableContents(NumElts);
+
+ for (Use &Operand : LookupTableArr->operands()) {
+ Constant *Element = cast<Constant>(Operand);
+ Type *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
+ Constant *Base = llvm::ConstantExpr::getPtrToInt(RelLookupTable, IntPtrTy);
+ Constant *Target = llvm::ConstantExpr::getPtrToInt(Element, IntPtrTy);
+ Constant *Sub = llvm::ConstantExpr::getSub(Target, Base);
+ Constant *RelOffset =
+ llvm::ConstantExpr::getTrunc(Sub, Type::getInt32Ty(M.getContext()));
+ RelLookupTableContents[Idx++] = RelOffset;
+ }
+
+ Constant *Initializer =
+ ConstantArray::get(IntArrayTy, RelLookupTableContents);
+ RelLookupTable->setInitializer(Initializer);
+ RelLookupTable->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ RelLookupTable->setAlignment(llvm::Align(4));
+ return RelLookupTable;
+}
+
+static void convertToRelLookupTable(GlobalVariable &LookupTable) {
+ GetElementPtrInst *GEP =
+ cast<GetElementPtrInst>(LookupTable.use_begin()->getUser());
+ LoadInst *Load = cast<LoadInst>(GEP->use_begin()->getUser());
+
+ Module &M = *LookupTable.getParent();
+ BasicBlock *BB = GEP->getParent();
+ IRBuilder<> Builder(BB);
+ Function &Func = *BB->getParent();
+
+ // Generate an array that consists of relative offsets.
+ GlobalVariable *RelLookupTable = createRelLookupTable(Func, LookupTable);
+
+ // Place new instruction sequence before GEP.
+ Builder.SetInsertPoint(GEP);
+ Value *Index = GEP->getOperand(2);
+ IntegerType *IntTy = cast<IntegerType>(Index->getType());
+ Value *Offset =
+ Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift");
+
+ Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration(
+ &M, Intrinsic::load_relative, {Index->getType()});
+ Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy());
+
+ // Create a call to load.relative intrinsic that computes the target address
+ // by adding base address (lookup table address) and relative offset.
+ Value *Result = Builder.CreateCall(LoadRelIntrinsic, {Base, Offset},
+ "reltable.intrinsic");
+
+ // Create a bitcast instruction if necessary.
+ if (Load->getType() != Builder.getInt8PtrTy())
+ Result = Builder.CreateBitCast(Result, Load->getType(), "reltable.bitcast");
+
+ // Replace load instruction with the new generated instruction sequence.
+ Load->replaceAllUsesWith(Result);
+ // Remove Load and GEP instructions.
+ Load->eraseFromParent();
+ GEP->eraseFromParent();
+}
+
+// Convert lookup tables to relative lookup tables in the module.
+static bool convertToRelativeLookupTables(
+ Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+ Module::iterator FI = M.begin();
+ if (FI == M.end())
+ return false;
+
+ // Check if we have a target that supports relative lookup tables.
+ if (!GetTTI(*FI).shouldBuildRelLookupTables())
+ return false;
+
+ bool Changed = false;
+
+ for (auto GVI = M.global_begin(), E = M.global_end(); GVI != E;) {
+ GlobalVariable &GV = *GVI++;
+
+ if (!shouldConvertToRelLookupTable(M, GV))
+ continue;
+
+ convertToRelLookupTable(GV);
+
+ // Remove the original lookup table.
+ GV.eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+PreservedAnalyses RelLookupTableConverterPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ if (!convertToRelativeLookupTables(M, GetTTI))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp
new file mode 100644
index 000000000000..4cf99abcc10f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -0,0 +1,1713 @@
+//===- SCCPSolver.cpp - SCCP Utility --------------------------- *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements the Sparse Conditional Constant Propagation (SCCP)
+// utility.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SCCPSolver.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sccp"
+
+// The maximum number of range extensions allowed for operations requiring
+// widening.
+static const unsigned MaxNumRangeExtensions = 10;
+
+/// Returns MergeOptions with MaxWidenSteps set to MaxNumRangeExtensions.
+static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() {
+ return ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ MaxNumRangeExtensions);
+}
+
+namespace {
+
+// Helper to check if \p LV is either a constant or a constant
+// range with a single element. This should cover exactly the same cases as the
+// old ValueLatticeElement::isConstant() and is intended to be used in the
+// transition to ValueLatticeElement.
+bool isConstant(const ValueLatticeElement &LV) {
+ return LV.isConstant() ||
+ (LV.isConstantRange() && LV.getConstantRange().isSingleElement());
+}
+
+// Helper to check if \p LV is either overdefined or a constant range with more
+// than a single element. This should cover exactly the same cases as the old
+// ValueLatticeElement::isOverdefined() and is intended to be used in the
+// transition to ValueLatticeElement.
+bool isOverdefined(const ValueLatticeElement &LV) {
+ return !LV.isUnknownOrUndef() && !isConstant(LV);
+}
+
+} // namespace
+
+namespace llvm {
+
+/// Helper class for SCCPSolver. This implements the instruction visitor and
+/// holds all the state.
+class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
+ const DataLayout &DL;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
+ SmallPtrSet<BasicBlock *, 8> BBExecutable; // The BBs that are executable.
+ DenseMap<Value *, ValueLatticeElement>
+ ValueState; // The state each value is in.
+
+ /// StructValueState - This maintains ValueState for values that have
+ /// StructType, for example for formal arguments, calls, insertelement, etc.
+ DenseMap<std::pair<Value *, unsigned>, ValueLatticeElement> StructValueState;
+
+ /// GlobalValue - If we are tracking any values for the contents of a global
+ /// variable, we keep a mapping from the constant accessor to the element of
+ /// the global, to the currently known value. If the value becomes
+ /// overdefined, it's entry is simply removed from this map.
+ DenseMap<GlobalVariable *, ValueLatticeElement> TrackedGlobals;
+
+ /// TrackedRetVals - If we are tracking arguments into and the return
+ /// value out of a function, it will have an entry in this map, indicating
+ /// what the known return value for the function is.
+ MapVector<Function *, ValueLatticeElement> TrackedRetVals;
+
+ /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+ /// that return multiple values.
+ MapVector<std::pair<Function *, unsigned>, ValueLatticeElement>
+ TrackedMultipleRetVals;
+
+ /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+ /// represented here for efficient lookup.
+ SmallPtrSet<Function *, 16> MRVFunctionsTracked;
+
+ /// A list of functions whose return cannot be modified.
+ SmallPtrSet<Function *, 16> MustPreserveReturnsInFunctions;
+
+ /// TrackingIncomingArguments - This is the set of functions for whose
+ /// arguments we make optimistic assumptions about and try to prove as
+ /// constants.
+ SmallPtrSet<Function *, 16> TrackingIncomingArguments;
+
+ /// The reason for two worklists is that overdefined is the lowest state
+ /// on the lattice, and moving things to overdefined as fast as possible
+ /// makes SCCP converge much faster.
+ ///
+ /// By having a separate worklist, we accomplish this because everything
+ /// possibly overdefined will become overdefined at the soonest possible
+ /// point.
+ SmallVector<Value *, 64> OverdefinedInstWorkList;
+ SmallVector<Value *, 64> InstWorkList;
+
+ // The BasicBlock work list
+ SmallVector<BasicBlock *, 64> BBWorkList;
+
+ /// KnownFeasibleEdges - Entries in this set are edges which have already had
+ /// PHI nodes retriggered.
+ using Edge = std::pair<BasicBlock *, BasicBlock *>;
+ DenseSet<Edge> KnownFeasibleEdges;
+
+ DenseMap<Function *, AnalysisResultsForFn> AnalysisResults;
+ DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers;
+
+ LLVMContext &Ctx;
+
+private:
+ ConstantInt *getConstantInt(const ValueLatticeElement &IV) const {
+ return dyn_cast_or_null<ConstantInt>(getConstant(IV));
+ }
+
+ // pushToWorkList - Helper for markConstant/markOverdefined
+ void pushToWorkList(ValueLatticeElement &IV, Value *V);
+
+ // Helper to push \p V to the worklist, after updating it to \p IV. Also
+ // prints a debug message with the updated value.
+ void pushToWorkListMsg(ValueLatticeElement &IV, Value *V);
+
+ // markConstant - Make a value be marked as "constant". If the value
+ // is not already a constant, add it to the instruction work list so that
+ // the users of the instruction are updated later.
+ bool markConstant(ValueLatticeElement &IV, Value *V, Constant *C,
+ bool MayIncludeUndef = false);
+
+ bool markConstant(Value *V, Constant *C) {
+ assert(!V->getType()->isStructTy() && "structs should use mergeInValue");
+ return markConstant(ValueState[V], V, C);
+ }
+
+ // markOverdefined - Make a value be marked as "overdefined". If the
+ // value is not already overdefined, add it to the overdefined instruction
+ // work list so that the users of the instruction are updated later.
+ bool markOverdefined(ValueLatticeElement &IV, Value *V);
+
+ /// Merge \p MergeWithV into \p IV and push \p V to the worklist, if \p IV
+ /// changes.
+ bool mergeInValue(ValueLatticeElement &IV, Value *V,
+ ValueLatticeElement MergeWithV,
+ ValueLatticeElement::MergeOptions Opts = {
+ /*MayIncludeUndef=*/false, /*CheckWiden=*/false});
+
+ bool mergeInValue(Value *V, ValueLatticeElement MergeWithV,
+ ValueLatticeElement::MergeOptions Opts = {
+ /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) {
+ assert(!V->getType()->isStructTy() &&
+ "non-structs should use markConstant");
+ return mergeInValue(ValueState[V], V, MergeWithV, Opts);
+ }
+
+ /// getValueState - Return the ValueLatticeElement object that corresponds to
+ /// the value. This function handles the case when the value hasn't been seen
+ /// yet by properly seeding constants etc.
+ ValueLatticeElement &getValueState(Value *V) {
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+ auto I = ValueState.insert(std::make_pair(V, ValueLatticeElement()));
+ ValueLatticeElement &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (auto *C = dyn_cast<Constant>(V))
+ LV.markConstant(C); // Constants are constant
+
+ // All others are unknown by default.
+ return LV;
+ }
+
+ /// getStructValueState - Return the ValueLatticeElement object that
+ /// corresponds to the value/field pair. This function handles the case when
+ /// the value hasn't been seen yet by properly seeding constants etc.
+ ValueLatticeElement &getStructValueState(Value *V, unsigned i) {
+ assert(V->getType()->isStructTy() && "Should use getValueState");
+ assert(i < cast<StructType>(V->getType())->getNumElements() &&
+ "Invalid element #");
+
+ auto I = StructValueState.insert(
+ std::make_pair(std::make_pair(V, i), ValueLatticeElement()));
+ ValueLatticeElement &LV = I.first->second;
+
+ if (!I.second)
+ return LV; // Common case, already in the map.
+
+ if (auto *C = dyn_cast<Constant>(V)) {
+ Constant *Elt = C->getAggregateElement(i);
+
+ if (!Elt)
+ LV.markOverdefined(); // Unknown sort of constant.
+ else if (isa<UndefValue>(Elt))
+ ; // Undef values remain unknown.
+ else
+ LV.markConstant(Elt); // Constants are constant.
+ }
+
+ // All others are underdefined by default.
+ return LV;
+ }
+
+ /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+ /// work list if it is not already executable.
+ bool markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest);
+
+ // getFeasibleSuccessors - Return a vector of booleans to indicate which
+ // successors are reachable from a given terminator instruction.
+ void getFeasibleSuccessors(Instruction &TI, SmallVectorImpl<bool> &Succs);
+
+ // OperandChangedState - This method is invoked on all of the users of an
+ // instruction that was just changed state somehow. Based on this
+ // information, we need to update the specified user of this instruction.
+ void operandChangedState(Instruction *I) {
+ if (BBExecutable.count(I->getParent())) // Inst is executable?
+ visit(*I);
+ }
+
+ // Add U as additional user of V.
+ void addAdditionalUser(Value *V, User *U) {
+ auto Iter = AdditionalUsers.insert({V, {}});
+ Iter.first->second.insert(U);
+ }
+
+ // Mark I's users as changed, including AdditionalUsers.
+ void markUsersAsChanged(Value *I) {
+ // Functions include their arguments in the use-list. Changed function
+ // values mean that the result of the function changed. We only need to
+ // update the call sites with the new function result and do not have to
+ // propagate the call arguments.
+ if (isa<Function>(I)) {
+ for (User *U : I->users()) {
+ if (auto *CB = dyn_cast<CallBase>(U))
+ handleCallResult(*CB);
+ }
+ } else {
+ for (User *U : I->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ operandChangedState(UI);
+ }
+
+ auto Iter = AdditionalUsers.find(I);
+ if (Iter != AdditionalUsers.end()) {
+ // Copy additional users before notifying them of changes, because new
+ // users may be added, potentially invalidating the iterator.
+ SmallVector<Instruction *, 2> ToNotify;
+ for (User *U : Iter->second)
+ if (auto *UI = dyn_cast<Instruction>(U))
+ ToNotify.push_back(UI);
+ for (Instruction *UI : ToNotify)
+ operandChangedState(UI);
+ }
+ }
+ void handleCallOverdefined(CallBase &CB);
+ void handleCallResult(CallBase &CB);
+ void handleCallArguments(CallBase &CB);
+
+private:
+ friend class InstVisitor<SCCPInstVisitor>;
+
+ // visit implementations - Something changed in this instruction. Either an
+ // operand made a transition, or the instruction is newly executable. Change
+ // the value type of I to reflect these changes if appropriate.
+ void visitPHINode(PHINode &I);
+
+ // Terminators
+
+ void visitReturnInst(ReturnInst &I);
+ void visitTerminator(Instruction &TI);
+
+ void visitCastInst(CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitUnaryOperator(Instruction &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitCmpInst(CmpInst &I);
+ void visitExtractValueInst(ExtractValueInst &EVI);
+ void visitInsertValueInst(InsertValueInst &IVI);
+
+ void visitCatchSwitchInst(CatchSwitchInst &CPI) {
+ markOverdefined(&CPI);
+ visitTerminator(CPI);
+ }
+
+ // Instructions that cannot be folded away.
+
+ void visitStoreInst(StoreInst &I);
+ void visitLoadInst(LoadInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+
+ void visitInvokeInst(InvokeInst &II) {
+ visitCallBase(II);
+ visitTerminator(II);
+ }
+
+ void visitCallBrInst(CallBrInst &CBI) {
+ visitCallBase(CBI);
+ visitTerminator(CBI);
+ }
+
+ void visitCallBase(CallBase &CB);
+ void visitResumeInst(ResumeInst &I) { /*returns void*/
+ }
+ void visitUnreachableInst(UnreachableInst &I) { /*returns void*/
+ }
+ void visitFenceInst(FenceInst &I) { /*returns void*/
+ }
+
+ void visitInstruction(Instruction &I);
+
+public:
+ void addAnalysis(Function &F, AnalysisResultsForFn A) {
+ AnalysisResults.insert({&F, std::move(A)});
+ }
+
+ void visitCallInst(CallInst &I) { visitCallBase(I); }
+
+ bool markBlockExecutable(BasicBlock *BB);
+
+ const PredicateBase *getPredicateInfoFor(Instruction *I) {
+ auto A = AnalysisResults.find(I->getParent()->getParent());
+ if (A == AnalysisResults.end())
+ return nullptr;
+ return A->second.PredInfo->getPredicateInfoFor(I);
+ }
+
+ DomTreeUpdater getDTU(Function &F) {
+ auto A = AnalysisResults.find(&F);
+ assert(A != AnalysisResults.end() && "Need analysis results for function.");
+ return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
+ }
+
+ SCCPInstVisitor(const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
+ LLVMContext &Ctx)
+ : DL(DL), GetTLI(GetTLI), Ctx(Ctx) {}
+
+ void trackValueOfGlobalVariable(GlobalVariable *GV) {
+ // We only track the contents of scalar globals.
+ if (GV->getValueType()->isSingleValueType()) {
+ ValueLatticeElement &IV = TrackedGlobals[GV];
+ if (!isa<UndefValue>(GV->getInitializer()))
+ IV.markConstant(GV->getInitializer());
+ }
+ }
+
+ void addTrackedFunction(Function *F) {
+ // Add an entry, F -> undef.
+ if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
+ MRVFunctionsTracked.insert(F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ TrackedMultipleRetVals.insert(
+ std::make_pair(std::make_pair(F, i), ValueLatticeElement()));
+ } else if (!F->getReturnType()->isVoidTy())
+ TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement()));
+ }
+
+ void addToMustPreserveReturnsInFunctions(Function *F) {
+ MustPreserveReturnsInFunctions.insert(F);
+ }
+
+ bool mustPreserveReturn(Function *F) {
+ return MustPreserveReturnsInFunctions.count(F);
+ }
+
+ void addArgumentTrackedFunction(Function *F) {
+ TrackingIncomingArguments.insert(F);
+ }
+
+ bool isArgumentTrackedFunction(Function *F) {
+ return TrackingIncomingArguments.count(F);
+ }
+
+ void solve();
+
+ bool resolvedUndefsIn(Function &F);
+
+ bool isBlockExecutable(BasicBlock *BB) const {
+ return BBExecutable.count(BB);
+ }
+
+ bool isEdgeFeasible(BasicBlock *From, BasicBlock *To) const;
+
+ std::vector<ValueLatticeElement> getStructLatticeValueFor(Value *V) const {
+ std::vector<ValueLatticeElement> StructValues;
+ auto *STy = dyn_cast<StructType>(V->getType());
+ assert(STy && "getStructLatticeValueFor() can be called only on structs");
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ auto I = StructValueState.find(std::make_pair(V, i));
+ assert(I != StructValueState.end() && "Value not in valuemap!");
+ StructValues.push_back(I->second);
+ }
+ return StructValues;
+ }
+
+ void removeLatticeValueFor(Value *V) { ValueState.erase(V); }
+
+ const ValueLatticeElement &getLatticeValueFor(Value *V) const {
+ assert(!V->getType()->isStructTy() &&
+ "Should use getStructLatticeValueFor");
+ DenseMap<Value *, ValueLatticeElement>::const_iterator I =
+ ValueState.find(V);
+ assert(I != ValueState.end() &&
+ "V not found in ValueState nor Paramstate map!");
+ return I->second;
+ }
+
+ const MapVector<Function *, ValueLatticeElement> &getTrackedRetVals() {
+ return TrackedRetVals;
+ }
+
+ const DenseMap<GlobalVariable *, ValueLatticeElement> &getTrackedGlobals() {
+ return TrackedGlobals;
+ }
+
+ const SmallPtrSet<Function *, 16> getMRVFunctionsTracked() {
+ return MRVFunctionsTracked;
+ }
+
+ void markOverdefined(Value *V) {
+ if (auto *STy = dyn_cast<StructType>(V->getType()))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ markOverdefined(getStructValueState(V, i), V);
+ else
+ markOverdefined(ValueState[V], V);
+ }
+
+ bool isStructLatticeConstant(Function *F, StructType *STy);
+
+ Constant *getConstant(const ValueLatticeElement &LV) const;
+
+ SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions() {
+ return TrackingIncomingArguments;
+ }
+
+ void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C);
+
+ void markFunctionUnreachable(Function *F) {
+ for (auto &BB : *F)
+ BBExecutable.erase(&BB);
+ }
+};
+
+} // namespace llvm
+
+bool SCCPInstVisitor::markBlockExecutable(BasicBlock *BB) {
+ if (!BBExecutable.insert(BB).second)
+ return false;
+ LLVM_DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n');
+ BBWorkList.push_back(BB); // Add the block to the work list!
+ return true;
+}
+
+void SCCPInstVisitor::pushToWorkList(ValueLatticeElement &IV, Value *V) {
+ if (IV.isOverdefined())
+ return OverdefinedInstWorkList.push_back(V);
+ InstWorkList.push_back(V);
+}
+
+void SCCPInstVisitor::pushToWorkListMsg(ValueLatticeElement &IV, Value *V) {
+ LLVM_DEBUG(dbgs() << "updated " << IV << ": " << *V << '\n');
+ pushToWorkList(IV, V);
+}
+
+bool SCCPInstVisitor::markConstant(ValueLatticeElement &IV, Value *V,
+ Constant *C, bool MayIncludeUndef) {
+ if (!IV.markConstant(C, MayIncludeUndef))
+ return false;
+ LLVM_DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+ pushToWorkList(IV, V);
+ return true;
+}
+
+bool SCCPInstVisitor::markOverdefined(ValueLatticeElement &IV, Value *V) {
+ if (!IV.markOverdefined())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "markOverdefined: ";
+ if (auto *F = dyn_cast<Function>(V)) dbgs()
+ << "Function '" << F->getName() << "'\n";
+ else dbgs() << *V << '\n');
+ // Only instructions go on the work list
+ pushToWorkList(IV, V);
+ return true;
+}
+
+bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+ assert(It != TrackedMultipleRetVals.end());
+ ValueLatticeElement LV = It->second;
+ if (!isConstant(LV))
+ return false;
+ }
+ return true;
+}
+
+Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const {
+ if (LV.isConstant())
+ return LV.getConstant();
+
+ if (LV.isConstantRange()) {
+ const auto &CR = LV.getConstantRange();
+ if (CR.getSingleElement())
+ return ConstantInt::get(Ctx, *CR.getSingleElement());
+ }
+ return nullptr;
+}
+
+void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A,
+ Constant *C) {
+ assert(F->arg_size() == A->getParent()->arg_size() &&
+ "Functions should have the same number of arguments");
+
+ // Mark the argument constant in the new function.
+ markConstant(A, C);
+
+ // For the remaining arguments in the new function, copy the lattice state
+ // over from the old function.
+ for (auto I = F->arg_begin(), J = A->getParent()->arg_begin(),
+ E = F->arg_end();
+ I != E; ++I, ++J)
+ if (J != A && ValueState.count(I)) {
+ ValueState[J] = ValueState[I];
+ pushToWorkList(ValueState[J], J);
+ }
+}
+
+void SCCPInstVisitor::visitInstruction(Instruction &I) {
+ // All the instructions we don't do any special handling for just
+ // go to overdefined.
+ LLVM_DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
+ markOverdefined(&I);
+}
+
+bool SCCPInstVisitor::mergeInValue(ValueLatticeElement &IV, Value *V,
+ ValueLatticeElement MergeWithV,
+ ValueLatticeElement::MergeOptions Opts) {
+ if (IV.mergeIn(MergeWithV, Opts)) {
+ pushToWorkList(IV, V);
+ LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : "
+ << IV << "\n");
+ return true;
+ }
+ return false;
+}
+
+bool SCCPInstVisitor::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+ if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+ return false; // This edge is already known to be executable!
+
+ if (!markBlockExecutable(Dest)) {
+ // If the destination is already executable, we just made an *edge*
+ // feasible that wasn't before. Revisit the PHI nodes in the block
+ // because they have potentially new operands.
+ LLVM_DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+ << " -> " << Dest->getName() << '\n');
+
+ for (PHINode &PN : Dest->phis())
+ visitPHINode(PN);
+ }
+ return true;
+}
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
+ SmallVectorImpl<bool> &Succs) {
+ Succs.resize(TI.getNumSuccessors());
+ if (auto *BI = dyn_cast<BranchInst>(&TI)) {
+ if (BI->isUnconditional()) {
+ Succs[0] = true;
+ return;
+ }
+
+ ValueLatticeElement BCValue = getValueState(BI->getCondition());
+ ConstantInt *CI = getConstantInt(BCValue);
+ if (!CI) {
+ // Overdefined condition variables, and branches on unfoldable constant
+ // conditions, mean the branch could go either way.
+ if (!BCValue.isUnknownOrUndef())
+ Succs[0] = Succs[1] = true;
+ return;
+ }
+
+ // Constant condition variables mean the branch can only go a single way.
+ Succs[CI->isZero()] = true;
+ return;
+ }
+
+ // Unwinding instructions successors are always executable.
+ if (TI.isExceptionalTerminator()) {
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ if (auto *SI = dyn_cast<SwitchInst>(&TI)) {
+ if (!SI->getNumCases()) {
+ Succs[0] = true;
+ return;
+ }
+ const ValueLatticeElement &SCValue = getValueState(SI->getCondition());
+ if (ConstantInt *CI = getConstantInt(SCValue)) {
+ Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
+ return;
+ }
+
+ // TODO: Switch on undef is UB. Stop passing false once the rest of LLVM
+ // is ready.
+ if (SCValue.isConstantRange(/*UndefAllowed=*/false)) {
+ const ConstantRange &Range = SCValue.getConstantRange();
+ for (const auto &Case : SI->cases()) {
+ const APInt &CaseValue = Case.getCaseValue()->getValue();
+ if (Range.contains(CaseValue))
+ Succs[Case.getSuccessorIndex()] = true;
+ }
+
+ // TODO: Determine whether default case is reachable.
+ Succs[SI->case_default()->getSuccessorIndex()] = true;
+ return;
+ }
+
+ // Overdefined or unknown condition? All destinations are executable!
+ if (!SCValue.isUnknownOrUndef())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ // In case of indirect branch and its address is a blockaddress, we mark
+ // the target as executable.
+ if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
+ // Casts are folded by visitCastInst.
+ ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
+ BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(getConstant(IBRValue));
+ if (!Addr) { // Overdefined or unknown condition?
+ // All destinations are executable!
+ if (!IBRValue.isUnknownOrUndef())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ BasicBlock *T = Addr->getBasicBlock();
+ assert(Addr->getFunction() == T->getParent() &&
+ "Block address of a different function ?");
+ for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) {
+ // This is the target.
+ if (IBR->getDestination(i) == T) {
+ Succs[i] = true;
+ return;
+ }
+ }
+
+ // If we didn't find our destination in the IBR successor list, then we
+ // have undefined behavior. Its ok to assume no successor is executable.
+ return;
+ }
+
+ // In case of callbr, we pessimistically assume that all successors are
+ // feasible.
+ if (isa<CallBrInst>(&TI)) {
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
+ // Check if we've called markEdgeExecutable on the edge yet. (We could
+ // be more aggressive and try to consider edges which haven't been marked
+ // yet, but there isn't any need.)
+ return KnownFeasibleEdges.count(Edge(From, To));
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable. Change
+// the value type of I to reflect these changes if appropriate. This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+// from different branches, or if the PHI node merges in an overdefined
+// value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+// PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+// destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+// successors executable.
+void SCCPInstVisitor::visitPHINode(PHINode &PN) {
+ // If this PN returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (PN.getType()->isStructTy())
+ return (void)markOverdefined(&PN);
+
+ if (getValueState(&PN).isOverdefined())
+ return; // Quick exit
+
+ // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+ // and slow us down a lot. Just mark them overdefined.
+ if (PN.getNumIncomingValues() > 64)
+ return (void)markOverdefined(&PN);
+
+ unsigned NumActiveIncoming = 0;
+
+ // Look at all of the executable operands of the PHI node. If any of them
+ // are overdefined, the PHI becomes overdefined as well. If they are all
+ // constant, and they agree with each other, the PHI becomes the identical
+ // constant. If they are constant and don't agree, the PHI is a constant
+ // range. If there are no executable operands, the PHI remains unknown.
+ ValueLatticeElement PhiState = getValueState(&PN);
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+
+ ValueLatticeElement IV = getValueState(PN.getIncomingValue(i));
+ PhiState.mergeIn(IV);
+ NumActiveIncoming++;
+ if (PhiState.isOverdefined())
+ break;
+ }
+
+ // We allow up to 1 range extension per active incoming value and one
+ // additional extension. Note that we manually adjust the number of range
+ // extensions to match the number of active incoming values. This helps to
+ // limit multiple extensions caused by the same incoming value, if other
+ // incoming values are equal.
+ mergeInValue(&PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ NumActiveIncoming + 1));
+ ValueLatticeElement &PhiStateRef = getValueState(&PN);
+ PhiStateRef.setNumRangeExtensions(
+ std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
+}
+
+void SCCPInstVisitor::visitReturnInst(ReturnInst &I) {
+ if (I.getNumOperands() == 0)
+ return; // ret void
+
+ Function *F = I.getParent()->getParent();
+ Value *ResultOp = I.getOperand(0);
+
+ // If we are tracking the return value of this function, merge it in.
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+ auto TFRVI = TrackedRetVals.find(F);
+ if (TFRVI != TrackedRetVals.end()) {
+ mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+ return;
+ }
+ }
+
+ // Handle functions that return multiple values.
+ if (!TrackedMultipleRetVals.empty()) {
+ if (auto *STy = dyn_cast<StructType>(ResultOp->getType()))
+ if (MRVFunctionsTracked.count(F))
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+ getStructValueState(ResultOp, i));
+ }
+}
+
+void SCCPInstVisitor::visitTerminator(Instruction &TI) {
+ SmallVector<bool, 16> SuccFeasible;
+ getFeasibleSuccessors(TI, SuccFeasible);
+
+ BasicBlock *BB = TI.getParent();
+
+ // Mark all feasible successors executable.
+ for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+ if (SuccFeasible[i])
+ markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPInstVisitor::visitCastInst(CastInst &I) {
+ // ResolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&I].isOverdefined())
+ return;
+
+ ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ if (Constant *OpC = getConstant(OpSt)) {
+ // Fold the constant as we build.
+ Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
+ if (isa<UndefValue>(C))
+ return;
+ // Propagate constant value
+ markConstant(&I, C);
+ } else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) {
+ auto &LV = getValueState(&I);
+ ConstantRange OpRange = OpSt.getConstantRange();
+ Type *DestTy = I.getDestTy();
+ // Vectors where all elements have the same known constant range are treated
+ // as a single constant range in the lattice. When bitcasting such vectors,
+ // there is a mis-match between the width of the lattice value (single
+ // constant range) and the original operands (vector). Go to overdefined in
+ // that case.
+ if (I.getOpcode() == Instruction::BitCast &&
+ I.getOperand(0)->getType()->isVectorTy() &&
+ OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy))
+ return (void)markOverdefined(&I);
+
+ ConstantRange Res =
+ OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
+ mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
+ } else if (!OpSt.isUnknownOrUndef())
+ markOverdefined(&I);
+}
+
+void SCCPInstVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
+ // If this returns a struct, mark all elements over defined, we don't track
+ // structs in structs.
+ if (EVI.getType()->isStructTy())
+ return (void)markOverdefined(&EVI);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&EVI].isOverdefined())
+ return (void)markOverdefined(&EVI);
+
+ // If this is extracting from more than one level of struct, we don't know.
+ if (EVI.getNumIndices() != 1)
+ return (void)markOverdefined(&EVI);
+
+ Value *AggVal = EVI.getAggregateOperand();
+ if (AggVal->getType()->isStructTy()) {
+ unsigned i = *EVI.idx_begin();
+ ValueLatticeElement EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return (void)markOverdefined(&EVI);
+ }
+}
+
+void SCCPInstVisitor::visitInsertValueInst(InsertValueInst &IVI) {
+ auto *STy = dyn_cast<StructType>(IVI.getType());
+ if (!STy)
+ return (void)markOverdefined(&IVI);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (isOverdefined(ValueState[&IVI]))
+ return (void)markOverdefined(&IVI);
+
+ // If this has more than one index, we can't handle it, drive all results to
+ // undef.
+ if (IVI.getNumIndices() != 1)
+ return (void)markOverdefined(&IVI);
+
+ Value *Aggr = IVI.getAggregateOperand();
+ unsigned Idx = *IVI.idx_begin();
+
+ // Compute the result based on what we're inserting.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ // This passes through all values that aren't the inserted element.
+ if (i != Idx) {
+ ValueLatticeElement EltVal = getStructValueState(Aggr, i);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+ continue;
+ }
+
+ Value *Val = IVI.getInsertedValueOperand();
+ if (Val->getType()->isStructTy())
+ // We don't track structs in structs.
+ markOverdefined(getStructValueState(&IVI, i), &IVI);
+ else {
+ ValueLatticeElement InVal = getValueState(Val);
+ mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+ }
+ }
+}
+
+void SCCPInstVisitor::visitSelectInst(SelectInst &I) {
+ // If this select returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this if code actually uses this.
+ if (I.getType()->isStructTy())
+ return (void)markOverdefined(&I);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&I].isOverdefined())
+ return (void)markOverdefined(&I);
+
+ ValueLatticeElement CondValue = getValueState(I.getCondition());
+ if (CondValue.isUnknownOrUndef())
+ return;
+
+ if (ConstantInt *CondCB = getConstantInt(CondValue)) {
+ Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+ mergeInValue(&I, getValueState(OpVal));
+ return;
+ }
+
+ // Otherwise, the condition is overdefined or a constant we can't evaluate.
+ // See if we can produce something better than overdefined based on the T/F
+ // value.
+ ValueLatticeElement TVal = getValueState(I.getTrueValue());
+ ValueLatticeElement FVal = getValueState(I.getFalseValue());
+
+ bool Changed = ValueState[&I].mergeIn(TVal);
+ Changed |= ValueState[&I].mergeIn(FVal);
+ if (Changed)
+ pushToWorkListMsg(ValueState[&I], &I);
+}
+
+// Handle Unary Operators.
+void SCCPInstVisitor::visitUnaryOperator(Instruction &I) {
+ ValueLatticeElement V0State = getValueState(I.getOperand(0));
+
+ ValueLatticeElement &IV = ValueState[&I];
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (isOverdefined(IV))
+ return (void)markOverdefined(&I);
+
+ if (isConstant(V0State)) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V0State));
+
+ // op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return (void)markConstant(IV, &I, C);
+ }
+
+ // If something is undef, wait for it to resolve.
+ if (!isOverdefined(V0State))
+ return;
+
+ markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
+ ValueLatticeElement V1State = getValueState(I.getOperand(0));
+ ValueLatticeElement V2State = getValueState(I.getOperand(1));
+
+ ValueLatticeElement &IV = ValueState[&I];
+ if (IV.isOverdefined())
+ return;
+
+ // If something is undef, wait for it to resolve.
+ if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef())
+ return;
+
+ if (V1State.isOverdefined() && V2State.isOverdefined())
+ return (void)markOverdefined(&I);
+
+ // If either of the operands is a constant, try to fold it to a constant.
+ // TODO: Use information from notconstant better.
+ if ((V1State.isConstant() || V2State.isConstant())) {
+ Value *V1 = isConstant(V1State) ? getConstant(V1State) : I.getOperand(0);
+ Value *V2 = isConstant(V2State) ? getConstant(V2State) : I.getOperand(1);
+ Value *R = SimplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
+ auto *C = dyn_cast_or_null<Constant>(R);
+ if (C) {
+ // X op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ // Conservatively assume that the result may be based on operands that may
+ // be undef. Note that we use mergeInValue to combine the constant with
+ // the existing lattice value for I, as different constants might be found
+ // after one of the operands go to overdefined, e.g. due to one operand
+ // being a special floating value.
+ ValueLatticeElement NewV;
+ NewV.markConstant(C, /*MayIncludeUndef=*/true);
+ return (void)mergeInValue(&I, NewV);
+ }
+ }
+
+ // Only use ranges for binary operators on integers.
+ if (!I.getType()->isIntegerTy())
+ return markOverdefined(&I);
+
+ // Try to simplify to a constant range.
+ ConstantRange A = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
+ ConstantRange B = ConstantRange::getFull(I.getType()->getScalarSizeInBits());
+ if (V1State.isConstantRange())
+ A = V1State.getConstantRange();
+ if (V2State.isConstantRange())
+ B = V2State.getConstantRange();
+
+ ConstantRange R = A.binaryOp(cast<BinaryOperator>(&I)->getOpcode(), B);
+ mergeInValue(&I, ValueLatticeElement::getRange(R));
+
+ // TODO: Currently we do not exploit special values that produce something
+ // better than overdefined with an overdefined operand for vector or floating
+ // point types, like and <4 x i32> overdefined, zeroinitializer.
+}
+
+// Handle ICmpInst instruction.
+void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
+ // Do not cache this lookup, getValueState calls later in the function might
+ // invalidate the reference.
+ if (isOverdefined(ValueState[&I]))
+ return (void)markOverdefined(&I);
+
+ Value *Op1 = I.getOperand(0);
+ Value *Op2 = I.getOperand(1);
+
+ // For parameters, use ParamState which includes constant range info if
+ // available.
+ auto V1State = getValueState(Op1);
+ auto V2State = getValueState(Op2);
+
+ Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State);
+ if (C) {
+ if (isa<UndefValue>(C))
+ return;
+ ValueLatticeElement CV;
+ CV.markConstant(C);
+ mergeInValue(&I, CV);
+ return;
+ }
+
+ // If operands are still unknown, wait for it to resolve.
+ if ((V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) &&
+ !isConstant(ValueState[&I]))
+ return;
+
+ markOverdefined(&I);
+}
+
+// Handle getelementptr instructions. If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
+ if (isOverdefined(ValueState[&I]))
+ return (void)markOverdefined(&I);
+
+ SmallVector<Constant *, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ ValueLatticeElement State = getValueState(I.getOperand(i));
+ if (State.isUnknownOrUndef())
+ return; // Operands are not resolved yet.
+
+ if (isOverdefined(State))
+ return (void)markOverdefined(&I);
+
+ if (Constant *C = getConstant(State)) {
+ Operands.push_back(C);
+ continue;
+ }
+
+ return (void)markOverdefined(&I);
+ }
+
+ Constant *Ptr = Operands[0];
+ auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
+ Constant *C =
+ ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+ if (isa<UndefValue>(C))
+ return;
+ markConstant(&I, C);
+}
+
+void SCCPInstVisitor::visitStoreInst(StoreInst &SI) {
+ // If this store is of a struct, ignore it.
+ if (SI.getOperand(0)->getType()->isStructTy())
+ return;
+
+ if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+ return;
+
+ GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+ auto I = TrackedGlobals.find(GV);
+ if (I == TrackedGlobals.end())
+ return;
+
+ // Get the value we are storing into the global, then merge it.
+ mergeInValue(I->second, GV, getValueState(SI.getOperand(0)),
+ ValueLatticeElement::MergeOptions().setCheckWiden(false));
+ if (I->second.isOverdefined())
+ TrackedGlobals.erase(I); // No need to keep tracking this!
+}
+
+static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
+ if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
+ if (I->getType()->isIntegerTy())
+ return ValueLatticeElement::getRange(
+ getConstantRangeFromMetadata(*Ranges));
+ if (I->hasMetadata(LLVMContext::MD_nonnull))
+ return ValueLatticeElement::getNot(
+ ConstantPointerNull::get(cast<PointerType>(I->getType())));
+ return ValueLatticeElement::getOverdefined();
+}
+
+// Handle load instructions. If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
+ // If this load is of a struct or the load is volatile, just mark the result
+ // as overdefined.
+ if (I.getType()->isStructTy() || I.isVolatile())
+ return (void)markOverdefined(&I);
+
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (ValueState[&I].isOverdefined())
+ return (void)markOverdefined(&I);
+
+ ValueLatticeElement PtrVal = getValueState(I.getOperand(0));
+ if (PtrVal.isUnknownOrUndef())
+ return; // The pointer is not resolved yet!
+
+ ValueLatticeElement &IV = ValueState[&I];
+
+ if (isConstant(PtrVal)) {
+ Constant *Ptr = getConstant(PtrVal);
+
+ // load null is undefined.
+ if (isa<ConstantPointerNull>(Ptr)) {
+ if (NullPointerIsDefined(I.getFunction(), I.getPointerAddressSpace()))
+ return (void)markOverdefined(IV, &I);
+ else
+ return;
+ }
+
+ // Transform load (constant global) into the value loaded.
+ if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) {
+ if (!TrackedGlobals.empty()) {
+ // If we are tracking this global, merge in the known value for it.
+ auto It = TrackedGlobals.find(GV);
+ if (It != TrackedGlobals.end()) {
+ mergeInValue(IV, &I, It->second, getMaxWidenStepsOpts());
+ return;
+ }
+ }
+ }
+
+ // Transform load from a constant into a constant if possible.
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) {
+ if (isa<UndefValue>(C))
+ return;
+ return (void)markConstant(IV, &I, C);
+ }
+ }
+
+ // Fall back to metadata.
+ mergeInValue(&I, getValueFromMetadata(&I));
+}
+
+void SCCPInstVisitor::visitCallBase(CallBase &CB) {
+ handleCallResult(CB);
+ handleCallArguments(CB);
+}
+
+void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
+ Function *F = CB.getCalledFunction();
+
+ // Void return and not tracking callee, just bail.
+ if (CB.getType()->isVoidTy())
+ return;
+
+ // Always mark struct return as overdefined.
+ if (CB.getType()->isStructTy())
+ return (void)markOverdefined(&CB);
+
+ // Otherwise, if we have a single return value case, and if the function is
+ // a declaration, maybe we can constant fold it.
+ if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) {
+ SmallVector<Constant *, 8> Operands;
+ for (auto AI = CB.arg_begin(), E = CB.arg_end(); AI != E; ++AI) {
+ if (AI->get()->getType()->isStructTy())
+ return markOverdefined(&CB); // Can't handle struct args.
+ ValueLatticeElement State = getValueState(*AI);
+
+ if (State.isUnknownOrUndef())
+ return; // Operands are not resolved yet.
+ if (isOverdefined(State))
+ return (void)markOverdefined(&CB);
+ assert(isConstant(State) && "Unknown state!");
+ Operands.push_back(getConstant(State));
+ }
+
+ if (isOverdefined(getValueState(&CB)))
+ return (void)markOverdefined(&CB);
+
+ // If we can constant fold this, mark the result of the call as a
+ // constant.
+ if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) {
+ // call -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return (void)markConstant(&CB, C);
+ }
+ }
+
+ // Fall back to metadata.
+ mergeInValue(&CB, getValueFromMetadata(&CB));
+}
+
+void SCCPInstVisitor::handleCallArguments(CallBase &CB) {
+ Function *F = CB.getCalledFunction();
+ // If this is a local function that doesn't have its address taken, mark its
+ // entry block executable and merge in the actual arguments to the call into
+ // the formal arguments of the function.
+ if (!TrackingIncomingArguments.empty() &&
+ TrackingIncomingArguments.count(F)) {
+ markBlockExecutable(&F->front());
+
+ // Propagate information from this call site into the callee.
+ auto CAI = CB.arg_begin();
+ for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+ ++AI, ++CAI) {
+ // If this argument is byval, and if the function is not readonly, there
+ // will be an implicit copy formed of the input aggregate.
+ if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+ markOverdefined(&*AI);
+ continue;
+ }
+
+ if (auto *STy = dyn_cast<StructType>(AI->getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement CallArg = getStructValueState(*CAI, i);
+ mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg,
+ getMaxWidenStepsOpts());
+ }
+ } else
+ mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts());
+ }
+ }
+}
+
+void SCCPInstVisitor::handleCallResult(CallBase &CB) {
+ Function *F = CB.getCalledFunction();
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&CB)) {
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
+ if (ValueState[&CB].isOverdefined())
+ return;
+
+ Value *CopyOf = CB.getOperand(0);
+ ValueLatticeElement CopyOfVal = getValueState(CopyOf);
+ const auto *PI = getPredicateInfoFor(&CB);
+ assert(PI && "Missing predicate info for ssa.copy");
+
+ const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+ if (!Constraint) {
+ mergeInValue(ValueState[&CB], &CB, CopyOfVal);
+ return;
+ }
+
+ CmpInst::Predicate Pred = Constraint->Predicate;
+ Value *OtherOp = Constraint->OtherOp;
+
+ // Wait until OtherOp is resolved.
+ if (getValueState(OtherOp).isUnknown()) {
+ addAdditionalUser(OtherOp, &CB);
+ return;
+ }
+
+ // TODO: Actually filp MayIncludeUndef for the created range to false,
+ // once most places in the optimizer respect the branches on
+ // undef/poison are UB rule. The reason why the new range cannot be
+ // undef is as follows below:
+ // The new range is based on a branch condition. That guarantees that
+ // neither of the compare operands can be undef in the branch targets,
+ // unless we have conditions that are always true/false (e.g. icmp ule
+ // i32, %a, i32_max). For the latter overdefined/empty range will be
+ // inferred, but the branch will get folded accordingly anyways.
+ bool MayIncludeUndef = !isa<PredicateAssume>(PI);
+
+ ValueLatticeElement CondVal = getValueState(OtherOp);
+ ValueLatticeElement &IV = ValueState[&CB];
+ if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
+ auto ImposedCR =
+ ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType()));
+
+ // Get the range imposed by the condition.
+ if (CondVal.isConstantRange())
+ ImposedCR = ConstantRange::makeAllowedICmpRegion(
+ Pred, CondVal.getConstantRange());
+
+ // Combine range info for the original value with the new range from the
+ // condition.
+ auto CopyOfCR = CopyOfVal.isConstantRange()
+ ? CopyOfVal.getConstantRange()
+ : ConstantRange::getFull(
+ DL.getTypeSizeInBits(CopyOf->getType()));
+ auto NewCR = ImposedCR.intersectWith(CopyOfCR);
+ // If the existing information is != x, do not use the information from
+ // a chained predicate, as the != x information is more likely to be
+ // helpful in practice.
+ if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
+ NewCR = CopyOfCR;
+
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB,
+ ValueLatticeElement::getRange(NewCR, MayIncludeUndef));
+ return;
+ } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) {
+ // For non-integer values or integer constant expressions, only
+ // propagate equal constants.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB, CondVal);
+ return;
+ } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() &&
+ !MayIncludeUndef) {
+ // Propagate inequalities.
+ addAdditionalUser(OtherOp, &CB);
+ mergeInValue(IV, &CB,
+ ValueLatticeElement::getNot(CondVal.getConstant()));
+ return;
+ }
+
+ return (void)mergeInValue(IV, &CB, CopyOfVal);
+ }
+
+ if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
+ // Compute result range for intrinsics supported by ConstantRange.
+ // Do this even if we don't know a range for all operands, as we may
+ // still know something about the result range, e.g. of abs(x).
+ SmallVector<ConstantRange, 2> OpRanges;
+ for (Value *Op : II->args()) {
+ const ValueLatticeElement &State = getValueState(Op);
+ if (State.isConstantRange())
+ OpRanges.push_back(State.getConstantRange());
+ else
+ OpRanges.push_back(
+ ConstantRange::getFull(Op->getType()->getScalarSizeInBits()));
+ }
+
+ ConstantRange Result =
+ ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges);
+ return (void)mergeInValue(II, ValueLatticeElement::getRange(Result));
+ }
+ }
+
+ // The common case is that we aren't tracking the callee, either because we
+ // are not doing interprocedural analysis or the callee is indirect, or is
+ // external. Handle these cases first.
+ if (!F || F->isDeclaration())
+ return handleCallOverdefined(CB);
+
+ // If this is a single/zero retval case, see if we're tracking the function.
+ if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
+ if (!MRVFunctionsTracked.count(F))
+ return handleCallOverdefined(CB); // Not tracking this callee.
+
+ // If we are tracking this callee, propagate the result of the function
+ // into this call site.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ mergeInValue(getStructValueState(&CB, i), &CB,
+ TrackedMultipleRetVals[std::make_pair(F, i)],
+ getMaxWidenStepsOpts());
+ } else {
+ auto TFRVI = TrackedRetVals.find(F);
+ if (TFRVI == TrackedRetVals.end())
+ return handleCallOverdefined(CB); // Not tracking this callee.
+
+ // If so, propagate the return value of the callee into this call result.
+ mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts());
+ }
+}
+
+void SCCPInstVisitor::solve() {
+ // Process the work lists until they are empty!
+ while (!BBWorkList.empty() || !InstWorkList.empty() ||
+ !OverdefinedInstWorkList.empty()) {
+ // Process the overdefined instruction's work list first, which drives other
+ // things to overdefined more quickly.
+ while (!OverdefinedInstWorkList.empty()) {
+ Value *I = OverdefinedInstWorkList.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+ // "I" got into the work list because it either made the transition from
+ // bottom to constant, or to overdefined.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined
+ // Update all of the users of this instruction's value.
+ //
+ markUsersAsChanged(I);
+ }
+
+ // Process the instruction work list.
+ while (!InstWorkList.empty()) {
+ Value *I = InstWorkList.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+ // "I" got into the work list because it made the transition from undef to
+ // constant.
+ //
+ // Anything on this worklist that is overdefined need not be visited
+ // since all of its users will have already been marked as overdefined.
+ // Update all of the users of this instruction's value.
+ //
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+ markUsersAsChanged(I);
+ }
+
+ // Process the basic block work list.
+ while (!BBWorkList.empty()) {
+ BasicBlock *BB = BBWorkList.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+ // Notify all instructions in this basic block that they are newly
+ // executable.
+ visit(BB);
+ }
+ }
+}
+
+/// resolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption. After we solve dataflow, this
+/// method should be use to handle this. If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible). This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs. It conservatively marks
+/// them as overdefined.
+bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ if (!BBExecutable.count(&BB))
+ continue;
+
+ for (Instruction &I : BB) {
+ // Look for instructions which produce undef values.
+ if (I.getType()->isVoidTy())
+ continue;
+
+ if (auto *STy = dyn_cast<StructType>(I.getType())) {
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in resolvedUndefsIn.
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *F = CB->getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ continue;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ continue;
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement &LV = getStructValueState(&I, i);
+ if (LV.isUnknownOrUndef()) {
+ markOverdefined(LV, &I);
+ MadeChange = true;
+ }
+ }
+ continue;
+ }
+
+ ValueLatticeElement &LV = getValueState(&I);
+ if (!LV.isUnknownOrUndef())
+ continue;
+
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in resolvedUndefsIn.
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *F = CB->getCalledFunction())
+ if (TrackedRetVals.count(F))
+ continue;
+
+ if (isa<LoadInst>(I)) {
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ continue;
+ }
+
+ markOverdefined(&I);
+ MadeChange = true;
+ }
+
+ // Check to see if we have a branch or switch on an undefined value. If so
+ // we force the branch to go one way or the other to make the successor
+ // values live. It doesn't really matter which way we force it.
+ Instruction *TI = BB.getTerminator();
+ if (auto *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional())
+ continue;
+ if (!getValueState(BI->getCondition()).isUnknownOrUndef())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // false.
+ if (isa<UndefValue>(BI->getCondition())) {
+ BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+ markEdgeExecutable(&BB, TI->getSuccessor(1));
+ MadeChange = true;
+ continue;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Make sure some edge is executable, so a
+ // branch on "undef" always flows somewhere.
+ // FIXME: Distinguish between dead code and an LLVM "undef" value.
+ BasicBlock *DefaultSuccessor = TI->getSuccessor(1);
+ if (markEdgeExecutable(&BB, DefaultSuccessor))
+ MadeChange = true;
+
+ continue;
+ }
+
+ if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
+ // Indirect branch with no successor ?. Its ok to assume it branches
+ // to no target.
+ if (IBR->getNumSuccessors() < 1)
+ continue;
+
+ if (!getValueState(IBR->getAddress()).isUnknownOrUndef())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // the first successor of the indirect branch.
+ if (isa<UndefValue>(IBR->getAddress())) {
+ IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
+ markEdgeExecutable(&BB, IBR->getSuccessor(0));
+ MadeChange = true;
+ continue;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Make sure some edge is executable, so a
+ // branch on "undef" always flows somewhere.
+ // FIXME: IndirectBr on "undef" doesn't actually need to go anywhere:
+ // we can assume the branch has undefined behavior instead.
+ BasicBlock *DefaultSuccessor = IBR->getSuccessor(0);
+ if (markEdgeExecutable(&BB, DefaultSuccessor))
+ MadeChange = true;
+
+ continue;
+ }
+
+ if (auto *SI = dyn_cast<SwitchInst>(TI)) {
+ if (!SI->getNumCases() ||
+ !getValueState(SI->getCondition()).isUnknownOrUndef())
+ continue;
+
+ // If the input to SCCP is actually switch on undef, fix the undef to
+ // the first constant.
+ if (isa<UndefValue>(SI->getCondition())) {
+ SI->setCondition(SI->case_begin()->getCaseValue());
+ markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
+ MadeChange = true;
+ continue;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Make sure some edge is executable, so a
+ // branch on "undef" always flows somewhere.
+ // FIXME: Distinguish between dead code and an LLVM "undef" value.
+ BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor();
+ if (markEdgeExecutable(&BB, DefaultSuccessor))
+ MadeChange = true;
+
+ continue;
+ }
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// SCCPSolver implementations
+//
+SCCPSolver::SCCPSolver(
+ const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI,
+ LLVMContext &Ctx)
+ : Visitor(new SCCPInstVisitor(DL, std::move(GetTLI), Ctx)) {}
+
+SCCPSolver::~SCCPSolver() {}
+
+void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) {
+ return Visitor->addAnalysis(F, std::move(A));
+}
+
+bool SCCPSolver::markBlockExecutable(BasicBlock *BB) {
+ return Visitor->markBlockExecutable(BB);
+}
+
+const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) {
+ return Visitor->getPredicateInfoFor(I);
+}
+
+DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); }
+
+void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) {
+ Visitor->trackValueOfGlobalVariable(GV);
+}
+
+void SCCPSolver::addTrackedFunction(Function *F) {
+ Visitor->addTrackedFunction(F);
+}
+
+void SCCPSolver::addToMustPreserveReturnsInFunctions(Function *F) {
+ Visitor->addToMustPreserveReturnsInFunctions(F);
+}
+
+bool SCCPSolver::mustPreserveReturn(Function *F) {
+ return Visitor->mustPreserveReturn(F);
+}
+
+void SCCPSolver::addArgumentTrackedFunction(Function *F) {
+ Visitor->addArgumentTrackedFunction(F);
+}
+
+bool SCCPSolver::isArgumentTrackedFunction(Function *F) {
+ return Visitor->isArgumentTrackedFunction(F);
+}
+
+void SCCPSolver::solve() { Visitor->solve(); }
+
+bool SCCPSolver::resolvedUndefsIn(Function &F) {
+ return Visitor->resolvedUndefsIn(F);
+}
+
+bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const {
+ return Visitor->isBlockExecutable(BB);
+}
+
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
+ return Visitor->isEdgeFeasible(From, To);
+}
+
+std::vector<ValueLatticeElement>
+SCCPSolver::getStructLatticeValueFor(Value *V) const {
+ return Visitor->getStructLatticeValueFor(V);
+}
+
+void SCCPSolver::removeLatticeValueFor(Value *V) {
+ return Visitor->removeLatticeValueFor(V);
+}
+
+const ValueLatticeElement &SCCPSolver::getLatticeValueFor(Value *V) const {
+ return Visitor->getLatticeValueFor(V);
+}
+
+const MapVector<Function *, ValueLatticeElement> &
+SCCPSolver::getTrackedRetVals() {
+ return Visitor->getTrackedRetVals();
+}
+
+const DenseMap<GlobalVariable *, ValueLatticeElement> &
+SCCPSolver::getTrackedGlobals() {
+ return Visitor->getTrackedGlobals();
+}
+
+const SmallPtrSet<Function *, 16> SCCPSolver::getMRVFunctionsTracked() {
+ return Visitor->getMRVFunctionsTracked();
+}
+
+void SCCPSolver::markOverdefined(Value *V) { Visitor->markOverdefined(V); }
+
+bool SCCPSolver::isStructLatticeConstant(Function *F, StructType *STy) {
+ return Visitor->isStructLatticeConstant(F, STy);
+}
+
+Constant *SCCPSolver::getConstant(const ValueLatticeElement &LV) const {
+ return Visitor->getConstant(LV);
+}
+
+SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() {
+ return Visitor->getArgumentTrackedFunctions();
+}
+
+void SCCPSolver::markArgInFuncSpecialization(Function *F, Argument *A,
+ Constant *C) {
+ Visitor->markArgInFuncSpecialization(F, A, C);
+}
+
+void SCCPSolver::markFunctionUnreachable(Function *F) {
+ Visitor->markFunctionUnreachable(F);
+}
+
+void SCCPSolver::visit(Instruction *I) { Visitor->visit(I); }
+
+void SCCPSolver::visitCall(CallInst &I) { Visitor->visitCall(I); }
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index c210d1c46077..5893ce15b129 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -123,8 +123,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
}
} else {
bool isFirstPred = true;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *PredBB = *PI;
+ for (BasicBlock *PredBB : predecessors(BB)) {
Value *PredVal = GetValueAtEndOfBlock(PredBB);
PredValues.push_back(std::make_pair(PredBB, PredVal));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
new file mode 100644
index 000000000000..6d995cf4c048
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -0,0 +1,177 @@
+//===- SampleProfileLoaderBaseUtil.cpp - Profile loader Util func ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader base utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+namespace llvm {
+
+cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+ "sample-profile-max-propagate-iterations", cl::init(100),
+ cl::desc("Maximum number of iterations to go through when propagating "
+ "sample block/edge weights through the CFG."));
+
+cl::opt<unsigned> SampleProfileRecordCoverage(
+ "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of records in the input profile "
+ "are matched to the IR."));
+
+cl::opt<unsigned> SampleProfileSampleCoverage(
+ "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+ cl::desc("Emit a warning if less than N% of samples in the input profile "
+ "are matched to the IR."));
+
+cl::opt<bool> NoWarnSampleUnused(
+ "no-warn-sample-unused", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn off/on warnings about function with "
+ "samples but without debug information to use those samples. "));
+
+namespace sampleprofutil {
+
+/// Return true if the given callsite is hot wrt to hot cutoff threshold.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compare the callsite
+/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
+/// regarded as hot if the count is above the cutoff value.
+///
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
+/// be regarded as cold and much less inlining will happen in CGSCC inlining
+/// pass, so we tend to lower the hot criteria here to allow more early
+/// inlining to happen for warm callsites and it is helpful for performance.
+bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI,
+ bool ProfAccForSymsInList) {
+ if (!CallsiteFS)
+ return false; // The callsite was not inlined in the original binary.
+
+ assert(PSI && "PSI is expected to be non null");
+ uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+ if (ProfAccForSymsInList)
+ return !PSI->isColdCount(CallsiteTotalSamples);
+ else
+ return PSI->isHotCount(CallsiteTotalSamples);
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+ uint32_t LineOffset,
+ uint32_t Discriminator,
+ uint64_t Samples) {
+ LineLocation Loc(LineOffset, Discriminator);
+ unsigned &Count = SampleCoverage[FS][Loc];
+ bool FirstTime = (++Count == 1);
+ if (FirstTime)
+ TotalUsedSamples += Samples;
+ return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
+ ProfileSummaryInfo *PSI) const {
+ auto I = SampleCoverage.find(FS);
+
+ // The size of the coverage map for FS represents the number of records
+ // that were marked used at least once.
+ unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+ // If there are inlined callsites in this function, count the samples found
+ // in the respective bodies. However, do not bother counting callees with 0
+ // total samples, these are callees that were never invoked at runtime.
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
+ Count += countUsedRecords(CalleeSamples, PSI);
+ }
+
+ return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
+ ProfileSummaryInfo *PSI) const {
+ unsigned Count = FS->getBodySamples().size();
+
+ // Only count records in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
+ Count += countBodyRecords(CalleeSamples, PSI);
+ }
+
+ return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
+ ProfileSummaryInfo *PSI) const {
+ uint64_t Total = 0;
+ for (const auto &I : FS->getBodySamples())
+ Total += I.second.getSamples();
+
+ // Only count samples in hot callsites.
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(CalleeSamples, PSI, ProfAccForSymsInList))
+ Total += countBodySamples(CalleeSamples, PSI);
+ }
+
+ return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+ unsigned Total) const {
+ assert(Used <= Total &&
+ "number of used records cannot exceed the total number of records");
+ return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Create a global variable to flag FSDiscriminators are used.
+void createFSDiscriminatorVariable(Module *M) {
+ const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
+ if (M->getGlobalVariable(FSDiscriminatorVar))
+ return;
+
+ auto &Context = M->getContext();
+ // Place this variable to llvm.used so it won't be GC'ed.
+ appendToUsed(*M, {new GlobalVariable(*M, Type::getInt1Ty(Context), true,
+ GlobalValue::WeakODRLinkage,
+ ConstantInt::getTrue(Context),
+ FSDiscriminatorVar)});
+}
+
+} // end of namespace sampleprofutil
+} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 6dbfb0b61fea..3978e1e29825 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -29,6 +29,12 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
+#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
+#else
+#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
+#endif
+
using namespace llvm;
cl::opt<unsigned> llvm::SCEVCheapExpansionBudget(
@@ -55,7 +61,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// not allowed to move it.
BasicBlock::iterator BIP = Builder.GetInsertPoint();
- Instruction *Ret = nullptr;
+ Value *Ret = nullptr;
// Check to see if there is already a cast!
for (User *U : V->users()) {
@@ -76,20 +82,23 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// Create a new cast.
if (!Ret) {
- Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
- rememberInstruction(Ret);
+ SCEVInsertPointGuard Guard(Builder, this);
+ Builder.SetInsertPoint(&*IP);
+ Ret = Builder.CreateCast(Op, V, Ty, V->getName());
}
// We assert at the end of the function since IP might point to an
// instruction with different dominance properties than a cast
// (an invoke for example) and not dominate BIP (but the cast does).
- assert(SE.DT.dominates(Ret, &*BIP));
+ assert(!isa<Instruction>(Ret) ||
+ SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
return Ret;
}
BasicBlock::iterator
-SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) {
+SCEVExpander::findInsertPointAfter(Instruction *I,
+ Instruction *MustDominate) const {
BasicBlock::iterator IP = ++I->getIterator();
if (auto *II = dyn_cast<InvokeInst>(I))
IP = II->getNormalDest()->begin();
@@ -114,6 +123,34 @@ SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) {
return IP;
}
+BasicBlock::iterator
+SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ return IP;
+ }
+
+ // Cast the instruction immediately after the instruction.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return findInsertPointAfter(I, &*Builder.GetInsertPoint());
+
+ // Otherwise, this must be some kind of a constant,
+ // so let's plop this cast into the function's entry block.
+ assert(isa<Constant>(V) &&
+ "Expected the cast argument to be a global/constant");
+ return Builder.GetInsertBlock()
+ ->getParent()
+ ->getEntryBlock()
+ .getFirstInsertionPt();
+}
+
/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
/// which must be possible with a noop cast, doing what we can to share
/// the casts.
@@ -172,22 +209,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
if (Constant *C = dyn_cast<Constant>(V))
return ConstantExpr::getCast(Op, C, Ty);
- // Cast the argument at the beginning of the entry block, after
- // any bitcasts of other arguments.
- if (Argument *A = dyn_cast<Argument>(V)) {
- BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
- while ((isa<BitCastInst>(IP) &&
- isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
- cast<BitCastInst>(IP)->getOperand(0) != A) ||
- isa<DbgInfoIntrinsic>(IP))
- ++IP;
- return ReuseOrCreateCast(A, Ty, Op, IP);
- }
-
- // Cast the instruction immediately after the instruction.
- Instruction *I = cast<Instruction>(V);
- BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint());
- return ReuseOrCreateCast(I, Ty, Op, IP);
+ // Try to reuse existing cast, or insert one.
+ return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
}
/// InsertBinop - Insert the specified binary operator, doing a small amount
@@ -430,8 +453,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
PointerType *PTy,
Type *Ty,
Value *V) {
- Type *OriginalElTy = PTy->getElementType();
- Type *ElTy = OriginalElTy;
SmallVector<Value *, 4> GepIndices;
SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
bool AnyNonZeroIndices = false;
@@ -442,93 +463,97 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Type *IntIdxTy = DL.getIndexType(PTy);
- // Descend down the pointer's type and attempt to convert the other
- // operands into GEP indices, at each level. The first index in a GEP
- // indexes into the array implied by the pointer operand; the rest of
- // the indices index into the element or field type selected by the
- // preceding index.
- for (;;) {
- // If the scale size is not 0, attempt to factor out a scale for
- // array indexing.
- SmallVector<const SCEV *, 8> ScaledOps;
- if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
- if (!ElSize->isZero()) {
- SmallVector<const SCEV *, 8> NewOps;
- for (const SCEV *Op : Ops) {
- const SCEV *Remainder = SE.getConstant(Ty, 0);
- if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
- // Op now has ElSize factored out.
- ScaledOps.push_back(Op);
- if (!Remainder->isZero())
- NewOps.push_back(Remainder);
- AnyNonZeroIndices = true;
- } else {
- // The operand was not divisible, so add it to the list of operands
- // we'll scan next iteration.
- NewOps.push_back(Op);
+ // For opaque pointers, always generate i8 GEP.
+ if (!PTy->isOpaque()) {
+ // Descend down the pointer's type and attempt to convert the other
+ // operands into GEP indices, at each level. The first index in a GEP
+ // indexes into the array implied by the pointer operand; the rest of
+ // the indices index into the element or field type selected by the
+ // preceding index.
+ Type *ElTy = PTy->getElementType();
+ for (;;) {
+ // If the scale size is not 0, attempt to factor out a scale for
+ // array indexing.
+ SmallVector<const SCEV *, 8> ScaledOps;
+ if (ElTy->isSized()) {
+ const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
+ if (!ElSize->isZero()) {
+ SmallVector<const SCEV *, 8> NewOps;
+ for (const SCEV *Op : Ops) {
+ const SCEV *Remainder = SE.getConstant(Ty, 0);
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
+ // Op now has ElSize factored out.
+ ScaledOps.push_back(Op);
+ if (!Remainder->isZero())
+ NewOps.push_back(Remainder);
+ AnyNonZeroIndices = true;
+ } else {
+ // The operand was not divisible, so add it to the list of
+ // operands we'll scan next iteration.
+ NewOps.push_back(Op);
+ }
+ }
+ // If we made any changes, update Ops.
+ if (!ScaledOps.empty()) {
+ Ops = NewOps;
+ SimplifyAddOperands(Ops, Ty, SE);
}
- }
- // If we made any changes, update Ops.
- if (!ScaledOps.empty()) {
- Ops = NewOps;
- SimplifyAddOperands(Ops, Ty, SE);
}
}
- }
- // Record the scaled array index for this level of the type. If
- // we didn't find any operands that could be factored, tentatively
- // assume that element zero was selected (since the zero offset
- // would obviously be folded away).
- Value *Scaled =
- ScaledOps.empty()
- ? Constant::getNullValue(Ty)
- : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false);
- GepIndices.push_back(Scaled);
-
- // Collect struct field index operands.
- while (StructType *STy = dyn_cast<StructType>(ElTy)) {
- bool FoundFieldNo = false;
- // An empty struct has no fields.
- if (STy->getNumElements() == 0) break;
- // Field offsets are known. See if a constant offset falls within any of
- // the struct fields.
- if (Ops.empty())
- break;
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
- if (SE.getTypeSizeInBits(C->getType()) <= 64) {
- const StructLayout &SL = *DL.getStructLayout(STy);
- uint64_t FullOffset = C->getValue()->getZExtValue();
- if (FullOffset < SL.getSizeInBytes()) {
- unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
- GepIndices.push_back(
- ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
- ElTy = STy->getTypeAtIndex(ElIdx);
- Ops[0] =
- SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
- AnyNonZeroIndices = true;
- FoundFieldNo = true;
+ // Record the scaled array index for this level of the type. If
+ // we didn't find any operands that could be factored, tentatively
+ // assume that element zero was selected (since the zero offset
+ // would obviously be folded away).
+ Value *Scaled =
+ ScaledOps.empty()
+ ? Constant::getNullValue(Ty)
+ : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false);
+ GepIndices.push_back(Scaled);
+
+ // Collect struct field index operands.
+ while (StructType *STy = dyn_cast<StructType>(ElTy)) {
+ bool FoundFieldNo = false;
+ // An empty struct has no fields.
+ if (STy->getNumElements() == 0) break;
+ // Field offsets are known. See if a constant offset falls within any of
+ // the struct fields.
+ if (Ops.empty())
+ break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *DL.getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
+ }
}
+ // If no struct field offsets were found, tentatively assume that
+ // field zero was selected (since the zero offset would obviously
+ // be folded away).
+ if (!FoundFieldNo) {
+ ElTy = STy->getTypeAtIndex(0u);
+ GepIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
}
- // If no struct field offsets were found, tentatively assume that
- // field zero was selected (since the zero offset would obviously
- // be folded away).
- if (!FoundFieldNo) {
- ElTy = STy->getTypeAtIndex(0u);
- GepIndices.push_back(
- Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
}
- }
- if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
- ElTy = ATy->getElementType();
- else
- // FIXME: Handle VectorType.
- // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
- // constant, therefore can not be factored out. The generated IR is less
- // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
- break;
+ if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+ ElTy = ATy->getElementType();
+ else
+ // FIXME: Handle VectorType.
+ // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
+ // constant, therefore can not be factored out. The generated IR is less
+ // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
+ break;
+ }
}
// If none of the operands were convertible to proper GEP indices, cast
@@ -536,8 +561,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// better than ptrtoint+arithmetic+inttoptr at least.
if (!AnyNonZeroIndices) {
// Cast the base to i8*.
- V = InsertNoopCastOfTo(V,
- Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+ if (!PTy->isOpaque())
+ V = InsertNoopCastOfTo(V,
+ Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
assert(!isa<Instruction>(V) ||
SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
@@ -613,7 +639,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *Casted = V;
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
+ Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices,
+ "scevgep");
Ops.push_back(SE.getUnknown(GEP));
}
@@ -929,9 +956,8 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
// Addrec operands are always loop-invariant, so this can only happen
// if there are instructions which haven't been hoisted.
if (L == IVIncInsertLoop) {
- for (User::op_iterator OI = IncV->op_begin()+1,
- OE = IncV->op_end(); OI != OE; ++OI)
- if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ for (Use &Op : llvm::drop_begin(IncV->operands()))
+ if (Instruction *OInst = dyn_cast<Instruction>(Op))
if (!SE.DT.dominates(OInst, IVIncInsertPos))
return false;
}
@@ -978,10 +1004,10 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
case Instruction::BitCast:
return dyn_cast<Instruction>(IncV->getOperand(0));
case Instruction::GetElementPtr:
- for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
- if (isa<Constant>(*I))
+ for (Use &U : llvm::drop_begin(IncV->operands())) {
+ if (isa<Constant>(U))
continue;
- if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
+ if (Instruction *OInst = dyn_cast<Instruction>(U)) {
if (!SE.DT.dominates(OInst, InsertPos))
return nullptr;
}
@@ -1121,6 +1147,10 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
const SCEVAddRecExpr *Phi,
const SCEVAddRecExpr *Requested,
bool &InvertStep) {
+ // We can't transform to match a pointer PHI.
+ if (Phi->getType()->isPointerTy())
+ return false;
+
Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
@@ -1139,8 +1169,7 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
}
// Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
- if (SE.getAddExpr(Requested->getStart(),
- SE.getNegativeSCEV(Requested)) == Phi) {
+ if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
InvertStep = true;
return true;
}
@@ -1209,7 +1238,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// We should not look for a incomplete PHI. Getting SCEV for a incomplete
// PHI has no meaning at all.
if (!PN.isComplete()) {
- DEBUG_WITH_TYPE(
+ SCEV_DEBUG_WITH_TYPE(
DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
continue;
}
@@ -1364,9 +1393,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// can ensure that IVIncrement dominates the current uses.
PostIncLoops = SavedPostIncLoops;
- // Remember this PHI, even in post-inc mode.
+ // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
+ // effective when we are able to use an IV inserted here, so record it.
InsertedValues.insert(PN);
-
+ InsertedIVs.push_back(PN);
return PN;
}
@@ -1551,8 +1581,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// Rewrite an AddRec in terms of the canonical induction variable, if
// its type is more narrow.
if (CanonicalIV &&
- SE.getTypeSizeInBits(CanonicalIV->getType()) >
- SE.getTypeSizeInBits(Ty)) {
+ SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
+ !S->getType()->isPointerTy()) {
SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
@@ -1677,7 +1707,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
Value *V =
expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false);
- return Builder.CreatePtrToInt(V, S->getType());
+ return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
+ GetOptimalInsertionPointForCastOf(V));
}
Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
@@ -1716,8 +1747,14 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
LHS = InsertNoopCastOfTo(LHS, Ty);
}
Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+ Value *Sel;
+ if (Ty->isIntegerTy())
+ Sel = Builder.CreateIntrinsic(Intrinsic::smax, {Ty}, {LHS, RHS},
+ /*FMFSource=*/nullptr, "smax");
+ else {
+ Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
+ Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+ }
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1739,8 +1776,14 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
LHS = InsertNoopCastOfTo(LHS, Ty);
}
Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+ Value *Sel;
+ if (Ty->isIntegerTy())
+ Sel = Builder.CreateIntrinsic(Intrinsic::umax, {Ty}, {LHS, RHS},
+ /*FMFSource=*/nullptr, "umax");
+ else {
+ Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
+ Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+ }
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1762,8 +1805,14 @@ Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
LHS = InsertNoopCastOfTo(LHS, Ty);
}
Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+ Value *Sel;
+ if (Ty->isIntegerTy())
+ Sel = Builder.CreateIntrinsic(Intrinsic::smin, {Ty}, {LHS, RHS},
+ /*FMFSource=*/nullptr, "smin");
+ else {
+ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
+ Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+ }
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1785,8 +1834,14 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
LHS = InsertNoopCastOfTo(LHS, Ty);
}
Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+ Value *Sel;
+ if (Ty->isIntegerTy())
+ Sel = Builder.CreateIntrinsic(Intrinsic::umin, {Ty}, {LHS, RHS},
+ /*FMFSource=*/nullptr, "umin");
+ else {
+ Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
+ Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+ }
LHS = Sel;
}
// In the case of mixed integer and pointer types, cast the
@@ -1822,8 +1877,8 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user"));
else {
assert(Inst->getType()->isPointerTy());
- Tmp = cast<Instruction>(
- Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user"));
+ Tmp = cast<Instruction>(Builder.CreatePtrToInt(
+ Inst, Type::getInt32Ty(Inst->getContext()), "tmp.lcssa.user"));
}
V = fixupLCSSAFormFor(Tmp, 0);
@@ -1846,7 +1901,7 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
ScalarEvolution::ValueOffsetPair
SCEVExpander::FindValueInExprValueMap(const SCEV *S,
const Instruction *InsertPt) {
- SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
+ auto *Set = SE.getSCEVValues(S);
// If the expansion is not in CanonicalMode, and the SCEV contains any
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
if (CanonicalMode || !SE.containsAddRecurrence(S)) {
@@ -2045,8 +2100,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Phi->replaceAllUsesWith(V);
DeadInsts.emplace_back(Phi);
++NumElim;
- DEBUG_WITH_TYPE(DebugType, dbgs()
- << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
+ SCEV_DEBUG_WITH_TYPE(DebugType,
+ dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
+ << '\n');
continue;
}
@@ -2103,9 +2159,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
TruncExpr == SE.getSCEV(IsomorphicInc) &&
SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
hoistIVInc(OrigInc, IsomorphicInc)) {
- DEBUG_WITH_TYPE(DebugType,
- dbgs() << "INDVARS: Eliminated congruent iv.inc: "
- << *IsomorphicInc << '\n');
+ SCEV_DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+ << *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
Instruction *IP = nullptr;
@@ -2124,10 +2180,11 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
}
}
}
- DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: "
- << *Phi << '\n');
- DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: "
- << *OrigPhiRef << '\n');
+ SCEV_DEBUG_WITH_TYPE(DebugType,
+ dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
+ << '\n');
+ SCEV_DEBUG_WITH_TYPE(
+ DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
++NumElim;
Value *NewIV = OrigPhiRef;
if (OrigPhiRef->getType() != Phi->getType()) {
@@ -2179,13 +2236,13 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
return None;
}
-template<typename T> static int costAndCollectOperands(
+template<typename T> static InstructionCost costAndCollectOperands(
const SCEVOperand &WorkItem, const TargetTransformInfo &TTI,
TargetTransformInfo::TargetCostKind CostKind,
SmallVectorImpl<SCEVOperand> &Worklist) {
const T *S = cast<T>(WorkItem.S);
- int Cost = 0;
+ InstructionCost Cost = 0;
// Object to help map SCEV operands to expanded IR instructions.
struct OperationIndices {
OperationIndices(unsigned Opc, size_t min, size_t max) :
@@ -2200,7 +2257,7 @@ template<typename T> static int costAndCollectOperands(
// we know what the generated user(s) will be.
SmallVector<OperationIndices, 2> Operations;
- auto CastCost = [&](unsigned Opcode) {
+ auto CastCost = [&](unsigned Opcode) -> InstructionCost {
Operations.emplace_back(Opcode, 0, 0);
return TTI.getCastInstrCost(Opcode, S->getType(),
S->getOperand(0)->getType(),
@@ -2208,14 +2265,15 @@ template<typename T> static int costAndCollectOperands(
};
auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
- unsigned MinIdx = 0, unsigned MaxIdx = 1) {
+ unsigned MinIdx = 0,
+ unsigned MaxIdx = 1) -> InstructionCost {
Operations.emplace_back(Opcode, MinIdx, MaxIdx);
return NumRequired *
TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
};
- auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired,
- unsigned MinIdx, unsigned MaxIdx) {
+ auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
+ unsigned MaxIdx) -> InstructionCost {
Operations.emplace_back(Opcode, MinIdx, MaxIdx);
Type *OpType = S->getOperand(0)->getType();
return NumRequired * TTI.getCmpSelInstrCost(
@@ -2262,6 +2320,7 @@ template<typename T> static int costAndCollectOperands(
case scUMaxExpr:
case scSMinExpr:
case scUMinExpr: {
+ // FIXME: should this ask the cost for Intrinsic's?
Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
break;
@@ -2286,10 +2345,11 @@ template<typename T> static int costAndCollectOperands(
// Much like with normal add expr, the polynominal will require
// one less addition than the number of it's terms.
- int AddCost = ArithCost(Instruction::Add, NumTerms - 1,
- /*MinIdx*/1, /*MaxIdx*/1);
+ InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
+ /*MinIdx*/ 1, /*MaxIdx*/ 1);
// Here, *each* one of those will require a multiplication.
- int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
+ InstructionCost MulCost =
+ ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
Cost = AddCost + MulCost;
// What is the degree of this polynominal?
@@ -2320,10 +2380,10 @@ template<typename T> static int costAndCollectOperands(
bool SCEVExpander::isHighCostExpansionHelper(
const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
- int &BudgetRemaining, const TargetTransformInfo &TTI,
+ InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
SmallPtrSetImpl<const SCEV *> &Processed,
SmallVectorImpl<SCEVOperand> &Worklist) {
- if (BudgetRemaining < 0)
+ if (Cost > Budget)
return true; // Already run out of budget, give up.
const SCEV *S = WorkItem.S;
@@ -2353,17 +2413,16 @@ bool SCEVExpander::isHighCostExpansionHelper(
return 0;
const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
Type *Ty = S->getType();
- BudgetRemaining -= TTI.getIntImmCostInst(
+ Cost += TTI.getIntImmCostInst(
WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
- return BudgetRemaining < 0;
+ return Cost > Budget;
}
case scTruncate:
case scPtrToInt:
case scZeroExtend:
case scSignExtend: {
- int Cost =
+ Cost +=
costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
- BudgetRemaining -= Cost;
return false; // Will answer upon next entry into this function.
}
case scUDivExpr: {
@@ -2379,10 +2438,8 @@ bool SCEVExpander::isHighCostExpansionHelper(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
return false; // Consider it to be free.
- int Cost =
+ Cost +=
costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
- // Need to count the cost of this UDiv.
- BudgetRemaining -= Cost;
return false; // Will answer upon next entry into this function.
}
case scAddExpr:
@@ -2395,17 +2452,16 @@ bool SCEVExpander::isHighCostExpansionHelper(
"Nary expr should have more than 1 operand.");
// The simple nary expr will require one less op (or pair of ops)
// than the number of it's terms.
- int Cost =
+ Cost +=
costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
- BudgetRemaining -= Cost;
- return BudgetRemaining < 0;
+ return Cost > Budget;
}
case scAddRecExpr: {
assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
"Polynomial should be at least linear");
- BudgetRemaining -= costAndCollectOperands<SCEVAddRecExpr>(
+ Cost += costAndCollectOperands<SCEVAddRecExpr>(
WorkItem, TTI, CostKind, Worklist);
- return BudgetRemaining < 0;
+ return Cost > Budget;
}
}
llvm_unreachable("Unknown SCEV kind!");
@@ -2473,7 +2529,10 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false);
Value *NegStepValue =
expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false);
- Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false);
+ Value *StartValue = expandCodeForImpl(
+ isa<PointerType>(ARExpandTy) ? Start
+ : SE.getPtrToIntExpr(Start, ARExpandTy),
+ ARExpandTy, Loc, false);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2675,14 +2734,13 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
return true;
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
- for (const Value *V : InsertionPoint->operand_values())
- if (V == U->getValue())
- return true;
+ if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
+ return true;
}
return false;
}
-SCEVExpanderCleaner::~SCEVExpanderCleaner() {
+void SCEVExpanderCleaner::cleanup() {
// Result is used, nothing to remove.
if (ResultUsed)
return;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index de9560df9785..583bb379488e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -57,11 +57,13 @@
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -111,10 +113,6 @@ static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
"to speculatively execute to fold a 2-entry PHI node into a "
"select (default = 4)"));
-static cl::opt<bool> DupRet(
- "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
- cl::desc("Duplicate return instructions into unconditional branches"));
-
static cl::opt<bool>
HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
cl::desc("Hoist common instructions up to the parent block"));
@@ -149,9 +147,10 @@ static cl::opt<unsigned> MaxSpeculationDepth(
"speculatively executed instructions"));
static cl::opt<int>
-MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
- cl::desc("Max size of a block which is still considered "
- "small enough to thread through"));
+ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
+ cl::init(10),
+ cl::desc("Max size of a block which is still considered "
+ "small enough to thread through"));
// Two is chosen to allow one negation and a logical combine.
static cl::opt<unsigned>
@@ -235,7 +234,6 @@ class SimplifyCFGOpt {
bool FoldValueComparisonIntoPredecessors(Instruction *TI,
IRBuilder<> &Builder);
- bool simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
bool simplifySingleResume(ResumeInst *RI);
bool simplifyCommonResume(ResumeInst *RI);
@@ -246,12 +244,12 @@ class SimplifyCFGOpt {
bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
- bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder);
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);
- bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI);
+ bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
+ bool EqTermsOnly);
bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
const TargetTransformInfo &TTI);
bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
@@ -335,8 +333,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
/// which is assumed to be safe to speculate. TCC_Free means cheap,
/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
/// expensive.
-static unsigned ComputeSpeculationCost(const User *I,
- const TargetTransformInfo &TTI) {
+static InstructionCost computeSpeculationCost(const User *I,
+ const TargetTransformInfo &TTI) {
assert(isSafeToSpeculativelyExecute(I) &&
"Instruction is not safe to speculatively execute!");
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
@@ -349,19 +347,20 @@ static unsigned ComputeSpeculationCost(const User *I,
///
/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
/// see if V (which must be an instruction) and its recursive operands
-/// that do not dominate BB have a combined cost lower than CostRemaining and
+/// that do not dominate BB have a combined cost lower than Budget and
/// are non-trapping. If both are true, the instruction is inserted into the
/// set and true is returned.
///
/// The cost for most non-trapping instructions is defined as 1 except for
/// Select whose cost is 2.
///
-/// After this function returns, CostRemaining is decreased by the cost of
+/// After this function returns, Cost is increased by the cost of
/// V plus its non-dominating operands. If that cost is greater than
-/// CostRemaining, false is returned and CostRemaining is undefined.
-static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+/// Budget, false is returned and Cost is undefined.
+static bool dominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction *> &AggressiveInsts,
- int &BudgetRemaining,
+ InstructionCost &Cost,
+ InstructionCost Budget,
const TargetTransformInfo &TTI,
unsigned Depth = 0) {
// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
@@ -404,7 +403,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (!isSafeToSpeculativelyExecute(I))
return false;
- BudgetRemaining -= ComputeSpeculationCost(I, TTI);
+ Cost += computeSpeculationCost(I, TTI);
// Allow exactly one instruction to be speculated regardless of its cost
// (as long as it is safe to do so).
@@ -412,14 +411,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// or other expensive operation. The speculation of an expensive instruction
// is expected to be undone in CodeGenPrepare if the speculation has not
// enabled further IR optimizations.
- if (BudgetRemaining < 0 &&
- (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
+ if (Cost > Budget &&
+ (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
+ !Cost.isValid()))
return false;
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI,
+ for (Use &Op : I->operands())
+ if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
Depth + 1))
return false;
// Okay, it's safe to do this! Remember this instruction.
@@ -615,8 +615,8 @@ private:
}
// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
- ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
- ICI->getPredicate(), C->getValue());
+ ConstantRange Span =
+ ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue());
// Shift the range if the compare is fed by an add. This is the range
// compare idiom as emitted by instcombine.
@@ -906,24 +906,27 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
- SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
+ SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
auto *Successor = i->getCaseSuccessor();
- ++NumPerSuccessorCases[Successor];
+ if (DTU)
+ ++NumPerSuccessorCases[Successor];
if (DeadCases.count(i->getCaseValue())) {
Successor->removePredecessor(PredDef);
SI.removeCase(i);
- --NumPerSuccessorCases[Successor];
+ if (DTU)
+ --NumPerSuccessorCases[Successor];
}
}
- std::vector<DominatorTree::UpdateType> Updates;
- for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
- if (I.second == 0)
- Updates.push_back({DominatorTree::Delete, PredDef, I.first});
- if (DTU)
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, PredDef, I.first});
DTU->applyUpdates(Updates);
+ }
LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
@@ -954,7 +957,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
if (!TheRealDest)
TheRealDest = ThisDef;
- SmallSetVector<BasicBlock *, 2> RemovedSuccs;
+ SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
// Remove PHI node entries for dead edges.
BasicBlock *CheckEdge = TheRealDest;
@@ -1080,7 +1083,10 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// For an analogous reason, we must also drop all the metadata whose
// semantics we don't understand. We *can* preserve !annotation, because
// it is tied to the instruction itself, not the value or position.
- NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation);
+ // Similarly strip attributes on call parameters that may cause UB in
+ // location the call is moved to.
+ NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata(
+ LLVMContext::MD_annotation);
PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst);
NewBonusInst->takeName(&BonusInst);
@@ -1093,8 +1099,13 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
(NewBonusInst->getName() + ".merge").str());
SSAUpdate.AddAvailableValue(BB, &BonusInst);
SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
- for (Use &U : make_early_inc_range(BonusInst.uses()))
- SSAUpdate.RewriteUseAfterInsertions(U);
+ for (Use &U : make_early_inc_range(BonusInst.uses())) {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (UI->getParent() != PredBlock)
+ SSAUpdate.RewriteUseAfterInsertions(U);
+ else // Use is in the same block as, and comes before, NewBonusInst.
+ SSAUpdate.RewriteUse(U);
+ }
}
}
@@ -1103,7 +1114,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
BasicBlock *BB = TI->getParent();
BasicBlock *Pred = PTI->getParent();
- std::vector<DominatorTree::UpdateType> Updates;
+ SmallVector<DominatorTree::UpdateType, 32> Updates;
// Figure out which 'cases' to copy from SI to PSI.
std::vector<ValueEqualityComparisonCase> BBCases;
@@ -1168,7 +1179,7 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
// Reconstruct the new switch statement we will be building.
if (PredDefault != BBDefault) {
PredDefault->removePredecessor(Pred);
- if (PredDefault != BB)
+ if (DTU && PredDefault != BB)
Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
PredDefault = BBDefault;
++NewSuccessors[BBDefault];
@@ -1244,13 +1255,18 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
// Okay, at this point, we know which new successor Pred will get. Make
// sure we update the number of entries in the PHI nodes for these
// successors.
+ SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
+ if (DTU) {
+ SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
+ Updates.reserve(Updates.size() + NewSuccessors.size());
+ }
for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
NewSuccessors) {
for (auto I : seq(0, NewSuccessor.second)) {
(void)I;
AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
}
- if (!is_contained(successors(Pred), NewSuccessor.first))
+ if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
}
@@ -1290,18 +1306,21 @@ bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
InfLoopBlock =
BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
- Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
+ if (DTU)
+ Updates.push_back(
+ {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
}
NewSI->setSuccessor(i, InfLoopBlock);
}
- if (InfLoopBlock)
- Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
+ if (DTU) {
+ if (InfLoopBlock)
+ Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
- Updates.push_back({DominatorTree::Delete, Pred, BB});
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
- if (DTU)
DTU->applyUpdates(Updates);
+ }
++NumFoldValueComparisonIntoPredecessors;
return true;
@@ -1368,9 +1387,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
/// in the two blocks up into the branch block. The caller of this function
-/// guarantees that BI's block dominates BB1 and BB2.
+/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
+/// only perform hoisting in case both blocks only contain a terminator. In that
+/// case, only the original BI will be replaced and selects for PHIs are added.
bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ bool EqTermsOnly) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1379,6 +1401,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
+ // If either of the blocks has it's address taken, then we can't do this fold,
+ // because the code we'd hoist would no longer run when we jump into the block
+ // by it's address.
+ if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
+ return false;
+
BasicBlock::iterator BB1_Itr = BB1->begin();
BasicBlock::iterator BB2_Itr = BB2->begin();
@@ -1407,6 +1435,20 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
++NumHoistCommonCode;
});
+ // Check if only hoisting terminators is allowed. This does not add new
+ // instructions to the hoist location.
+ if (EqTermsOnly) {
+ // Skip any debug intrinsics, as they are free to hoist.
+ auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
+ auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
+ if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
+ return false;
+ if (!I1NonDbg->isTerminator())
+ return false;
+ // Now we know that we only need to hoist debug instrinsics and the
+ // terminator. Let the loop below handle those 2 cases.
+ }
+
do {
// If we are hoisting the terminator instruction, don't move one (making a
// broken BB), instead clone it, and remove BI.
@@ -1578,10 +1620,13 @@ HoistTerminator:
// Update any PHI nodes in our new successors.
for (BasicBlock *Succ : successors(BB1)) {
AddPredecessorToBlock(Succ, BIParent, BB1);
- Updates.push_back({DominatorTree::Insert, BIParent, Succ});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BIParent, Succ});
}
- for (BasicBlock *Succ : successors(BI))
- Updates.push_back({DominatorTree::Delete, BIParent, Succ});
+
+ if (DTU)
+ for (BasicBlock *Succ : successors(BI))
+ Updates.push_back({DominatorTree::Delete, BIParent, Succ});
EraseTerminatorAndDCECond(BI);
if (DTU)
@@ -1692,6 +1737,32 @@ static bool canSinkInstructions(
}))
return false;
+ // For calls to be sinkable, they must all be indirect, or have same callee.
+ // I.e. if we have two direct calls to different callees, we don't want to
+ // turn that into an indirect call. Likewise, if we have an indirect call,
+ // and a direct call, we don't actually want to have a single indirect call.
+ if (isa<CallBase>(I0)) {
+ auto IsIndirectCall = [](const Instruction *I) {
+ return cast<CallBase>(I)->isIndirectCall();
+ };
+ bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
+ bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
+ if (HaveIndirectCalls) {
+ if (!AllCallsAreIndirect)
+ return false;
+ } else {
+ // All callees must be identical.
+ Value *Callee = nullptr;
+ for (const Instruction *I : Insts) {
+ Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
+ if (!Callee)
+ Callee = CurrCallee;
+ else if (Callee != CurrCallee)
+ return false;
+ }
+ }
+ }
+
for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
Value *Op = I0->getOperand(OI);
if (Op->getType()->isTokenTy())
@@ -1707,11 +1778,6 @@ static bool canSinkInstructions(
!canReplaceOperandWithVariable(I0, OI))
// We can't create a PHI from this GEP.
return false;
- // Don't create indirect calls! The called value is the final operand.
- if (isa<CallBase>(I0) && OI == OE - 1) {
- // FIXME: if the call was *already* indirect, we should do this.
- return false;
- }
for (auto *I : Insts)
PHIOperands[I].push_back(I->getOperand(OI));
}
@@ -1871,6 +1937,20 @@ namespace {
}
}
+ void operator++() {
+ if (Fail)
+ return;
+ for (auto *&Inst : Insts) {
+ for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+ Inst = Inst->getNextNode();
+ // Already at end of block.
+ if (!Inst) {
+ Fail = true;
+ return;
+ }
+ }
+ }
+
ArrayRef<Instruction*> operator * () const {
return Insts;
}
@@ -1880,13 +1960,11 @@ namespace {
/// Check whether BB's predecessors end with unconditional branches. If it is
/// true, sink any common code from the predecessors to BB.
-/// We also allow one predecessor to end with conditional branch (but no more
-/// than one).
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
DomTreeUpdater *DTU) {
// We support two situations:
// (1) all incoming arcs are unconditional
- // (2) one incoming arc is conditional
+ // (2) there are non-unconditional incoming arcs
//
// (2) is very common in switch defaults and
// else-if patterns;
@@ -1926,15 +2004,13 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
// [ end ]
//
SmallVector<BasicBlock*,4> UnconditionalPreds;
- Instruction *Cond = nullptr;
- for (auto *B : predecessors(BB)) {
- auto *T = B->getTerminator();
- if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
- UnconditionalPreds.push_back(B);
- else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
- Cond = T;
+ bool HaveNonUnconditionalPredecessors = false;
+ for (auto *PredBB : predecessors(BB)) {
+ auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (PredBr && PredBr->isUnconditional())
+ UnconditionalPreds.push_back(PredBB);
else
- return false;
+ HaveNonUnconditionalPredecessors = true;
}
if (UnconditionalPreds.size() < 2)
return false;
@@ -1945,7 +2021,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
// carry on. If we can sink an instruction but need to PHI-merge some operands
// (because they're not identical in each instruction) we add these to
// PHIOperands.
- unsigned ScanIdx = 0;
+ int ScanIdx = 0;
SmallPtrSet<Value*,4> InstructionsToSink;
DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
LockstepReverseIterator LRI(UnconditionalPreds);
@@ -1962,14 +2038,18 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
if (ScanIdx == 0)
return false;
- bool Changed = false;
-
+ // Okay, we *could* sink last ScanIdx instructions. But how many can we
+ // actually sink before encountering instruction that is unprofitable to sink?
auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
unsigned NumPHIdValues = 0;
for (auto *I : *LRI)
- for (auto *V : PHIOperands[I])
+ for (auto *V : PHIOperands[I]) {
if (InstructionsToSink.count(V) == 0)
++NumPHIdValues;
+ // FIXME: this check is overly optimistic. We may end up not sinking
+ // said instruction, due to the very same profitability check.
+ // See @creating_too_many_phis in sink-common-code.ll.
+ }
LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
@@ -1978,16 +2058,80 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
return NumPHIInsts <= 1;
};
- if (Cond) {
- // Check if we would actually sink anything first! This mutates the CFG and
- // adds an extra block. The goal in doing this is to allow instructions that
- // couldn't be sunk before to be sunk - obviously, speculatable instructions
- // (such as trunc, add) can be sunk and predicated already. So we check that
- // we're going to sink at least one non-speculatable instruction.
+ // We've determined that we are going to sink last ScanIdx instructions,
+ // and recorded them in InstructionsToSink. Now, some instructions may be
+ // unprofitable to sink. But that determination depends on the instructions
+ // that we are going to sink.
+
+ // First, forward scan: find the first instruction unprofitable to sink,
+ // recording all the ones that are profitable to sink.
+ // FIXME: would it be better, after we detect that not all are profitable.
+ // to either record the profitable ones, or erase the unprofitable ones?
+ // Maybe we need to choose (at runtime) the one that will touch least instrs?
+ LRI.reset();
+ int Idx = 0;
+ SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
+ while (Idx < ScanIdx) {
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Too many PHIs would be created.
+ LLVM_DEBUG(
+ dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
+ break;
+ }
+ InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
+ --LRI;
+ ++Idx;
+ }
+
+ // If no instructions can be sunk, early-return.
+ if (Idx == 0)
+ return false;
+
+ // Did we determine that (only) some instructions are unprofitable to sink?
+ if (Idx < ScanIdx) {
+ // Okay, some instructions are unprofitable.
+ ScanIdx = Idx;
+ InstructionsToSink = InstructionsProfitableToSink;
+
+ // But, that may make other instructions unprofitable, too.
+ // So, do a backward scan, do any earlier instructions become unprofitable?
+ assert(!ProfitableToSinkInstruction(LRI) &&
+ "We already know that the last instruction is unprofitable to sink");
+ ++LRI;
+ --Idx;
+ while (Idx >= 0) {
+ // If we detect that an instruction becomes unprofitable to sink,
+ // all earlier instructions won't be sunk either,
+ // so preemptively keep InstructionsProfitableToSink in sync.
+ // FIXME: is this the most performant approach?
+ for (auto *I : *LRI)
+ InstructionsProfitableToSink.erase(I);
+ if (!ProfitableToSinkInstruction(LRI)) {
+ // Everything starting with this instruction won't be sunk.
+ ScanIdx = Idx;
+ InstructionsToSink = InstructionsProfitableToSink;
+ }
+ ++LRI;
+ --Idx;
+ }
+ }
+
+ // If no instructions can be sunk, early-return.
+ if (ScanIdx == 0)
+ return false;
+
+ bool Changed = false;
+
+ if (HaveNonUnconditionalPredecessors) {
+ // It is always legal to sink common instructions from unconditional
+ // predecessors. However, if not all predecessors are unconditional,
+ // this transformation might be pessimizing. So as a rule of thumb,
+ // don't do it unless we'd sink at least one non-speculatable instruction.
+ // See https://bugs.llvm.org/show_bug.cgi?id=30244
LRI.reset();
- unsigned Idx = 0;
+ int Idx = 0;
bool Profitable = false;
- while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) {
+ while (Idx < ScanIdx) {
if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
Profitable = true;
break;
@@ -2019,7 +2163,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
// sink presuming a later value will also be sunk, but stop half way through
// and never actually sink it which means we produce more PHIs than intended.
// This is unlikely in practice though.
- unsigned SinkIdx = 0;
+ int SinkIdx = 0;
for (; SinkIdx != ScanIdx; ++SinkIdx) {
LLVM_DEBUG(dbgs() << "SINK: Sink: "
<< *UnconditionalPreds[0]->getTerminator()->getPrevNode()
@@ -2028,12 +2172,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
// Because we've sunk every instruction in turn, the current instruction to
// sink is always at index 0.
LRI.reset();
- if (!ProfitableToSinkInstruction(LRI)) {
- // Too many PHIs would be created.
- LLVM_DEBUG(
- dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
- break;
- }
if (!sinkLastInstruction(UnconditionalPreds)) {
LLVM_DEBUG(
@@ -2087,6 +2225,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
return nullptr;
Value *StorePtr = StoreToHoist->getPointerOperand();
+ Type *StoreTy = StoreToHoist->getValueOperand()->getType();
// Look for a store to the same pointer in BrBB.
unsigned MaxNumInstToLookAt = 9;
@@ -2098,12 +2237,15 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
--MaxNumInstToLookAt;
// Could be calling an instruction that affects memory like free().
- if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
return nullptr;
if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
- // Found the previous store make sure it stores to the same location.
- if (SI->getPointerOperand() == StorePtr)
+ // Found the previous store to same location and type. Make sure it is
+ // simple, to avoid introducing a spurious non-atomic write after an
+ // atomic write.
+ if (SI->getPointerOperand() == StorePtr &&
+ SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
// Found the previous store, return its value operand.
return SI->getValueOperand();
return nullptr; // Unknown store.
@@ -2118,7 +2260,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
BasicBlock *EndBB,
unsigned &SpeculatedInstructions,
- int &BudgetRemaining,
+ InstructionCost &Cost,
const TargetTransformInfo &TTI) {
TargetTransformInfo::TargetCostKind CostKind =
BB->getParent()->hasMinSize()
@@ -2135,9 +2277,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
if (ThenV == OrigV)
continue;
- BudgetRemaining -=
- TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
// Don't convert to selects if we could remove undefined behavior instead.
if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
@@ -2153,9 +2294,9 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
(OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
- unsigned MaxCost =
+ InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
+ InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
+ InstructionCost MaxCost =
2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
if (OrigCost + ThenCost > MaxCost)
return false;
@@ -2218,8 +2359,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
BasicBlock *BB = BI->getParent();
BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
- int BudgetRemaining =
- PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ InstructionCost Budget =
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
// If ThenBB is actually on the false edge of the conditional branch, remember
// to swap the select operands later.
@@ -2230,6 +2371,20 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+ // If the branch is non-unpredictable, and is predicted to *not* branch to
+ // the `then` block, then avoid speculating it.
+ if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
+ uint64_t TWeight, FWeight;
+ if (BI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) {
+ uint64_t EndWeight = Invert ? TWeight : FWeight;
+ BranchProbability BIEndProb =
+ BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
+ BranchProbability Likely = TTI.getPredictableBranchThreshold();
+ if (BIEndProb >= Likely)
+ return false;
+ }
+ }
+
// Keep a count of how many times instructions are used within ThenBB when
// they are candidates for sinking into ThenBB. Specifically:
// - They are defined in BB, and
@@ -2256,6 +2411,10 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// probability for ThenBB, which is fine since the optimization here takes
// place regardless of the branch probability.
if (isa<PseudoProbeInst>(I)) {
+ // The probe should be deleted so that it will not be over-counted when
+ // the samples collected on the non-conditional path are counted towards
+ // the conditional path. We leave it for the counts inference algorithm to
+ // figure out a proper count for an unknown probe.
SpeculatedDbgIntrinsics.push_back(I);
continue;
}
@@ -2272,7 +2431,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
I, BB, ThenBB, EndBB))))
return false;
if (!SpeculatedStoreValue &&
- ComputeSpeculationCost(I, TTI) >
+ computeSpeculationCost(I, TTI) >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
@@ -2283,8 +2442,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Do not hoist the instruction if any of its operands are defined but not
// used in BB. The transformation will prevent the operand from
// being sunk into the use block.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
- Instruction *OpI = dyn_cast<Instruction>(*i);
+ for (Use &Op : I->operands()) {
+ Instruction *OpI = dyn_cast<Instruction>(Op);
if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
continue; // Not a candidate for sinking.
@@ -2308,10 +2467,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Check that we can insert the selects and that it's not too expensive to do
// so.
bool Convert = SpeculatedStore != nullptr;
+ InstructionCost Cost = 0;
Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
SpeculatedInstructions,
- BudgetRemaining, TTI);
- if (!Convert || BudgetRemaining < 0)
+ Cost, TTI);
+ if (!Convert || Cost > Budget)
return false;
// If we get here, we can hoist the instruction and if-convert.
@@ -2335,10 +2495,12 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Conservatively strip all metadata on the instruction. Drop the debug loc
// to avoid making it appear as if the condition is a constant, which would
// be misleading while debugging.
+ // Similarly strip attributes that maybe dependent on condition we are
+ // hoisting above.
for (auto &I : *ThenBB) {
if (!SpeculatedStoreValue || &I != SpeculatedStore)
I.setDebugLoc(DebugLoc());
- I.dropUnknownNonDebugMetadata();
+ I.dropUndefImplyingAttrsAndUnknownMetadata();
}
// Hoist the instructions.
@@ -2382,19 +2544,32 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;
- for (Instruction &I : BB->instructionsWithoutDebug()) {
- if (Size > MaxSmallBlockSize)
- return false; // Don't clone large BB's.
+ SmallPtrSet<const Value *, 32> EphValues;
+ auto IsEphemeral = [&](const Value *V) {
+ if (isa<AssumeInst>(V))
+ return true;
+ return isSafeToSpeculativelyExecute(V) &&
+ all_of(V->users(),
+ [&](const User *U) { return EphValues.count(U); });
+ };
+ // Walk the loop in reverse so that we can identify ephemeral values properly
+ // (values only feeding assumes).
+ for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
// Can't fold blocks that contain noduplicate or convergent calls.
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (CI->cannotDuplicate() || CI->isConvergent())
return false;
+ // Ignore ephemeral values which are deleted during codegen.
+ if (IsEphemeral(&I))
+ EphValues.insert(&I);
// We will delete Phis while threading, so Phis should not be accounted in
- // block's size
- if (!isa<PHINode>(I))
- ++Size;
+ // block's size.
+ else if (!isa<PHINode>(I)) {
+ if (Size++ > MaxSmallBlockSize)
+ return false; // Don't clone large BB's.
+ }
// We can only support instructions that do not define values that are
// live outside of the current basic block.
@@ -2460,7 +2635,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
RealDest->getParent(), RealDest);
BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
- Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
// Update PHI nodes.
@@ -2482,10 +2658,10 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
N->setName(BBI->getName() + ".c");
// Update operands due to translation.
- for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) {
- DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i);
+ for (Use &Op : N->operands()) {
+ DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
if (PI != TranslateMap.end())
- *i = PI->second;
+ Op = PI->second;
}
// Check for trivial simplification.
@@ -2505,8 +2681,9 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
EdgeBB->getInstList().insert(InsertPt, N);
// Register the new instruction with the assumption cache if necessary.
- if (AC && match(N, m_Intrinsic<Intrinsic::assume>()))
- AC->registerAssumption(cast<IntrinsicInst>(N));
+ if (auto *Assume = dyn_cast<AssumeInst>(N))
+ if (AC)
+ AC->registerAssumption(Assume);
}
}
@@ -2519,11 +2696,12 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
PredBBTI->setSuccessor(i, EdgeBB);
}
- Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
- if (DTU)
DTU->applyUpdates(Updates);
+ }
// Recurse, simplifying any other constants.
return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true;
@@ -2545,12 +2723,54 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
BasicBlock *BB = PN->getParent();
BasicBlock *IfTrue, *IfFalse;
- Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
- if (!IfCond ||
- // Don't bother if the branch will be constant folded trivially.
- isa<ConstantInt>(IfCond))
+ BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
+ if (!DomBI)
+ return false;
+ Value *IfCond = DomBI->getCondition();
+ // Don't bother if the branch will be constant folded trivially.
+ if (isa<ConstantInt>(IfCond))
return false;
+ BasicBlock *DomBlock = DomBI->getParent();
+ SmallVector<BasicBlock *, 2> IfBlocks;
+ llvm::copy_if(
+ PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
+ return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
+ });
+ assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
+ "Will have either one or two blocks to speculate.");
+
+ // If the branch is non-unpredictable, see if we either predictably jump to
+ // the merge bb (if we have only a single 'then' block), or if we predictably
+ // jump to one specific 'then' block (if we have two of them).
+ // It isn't beneficial to speculatively execute the code
+ // from the block that we know is predictably not entered.
+ if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
+ uint64_t TWeight, FWeight;
+ if (DomBI->extractProfMetadata(TWeight, FWeight) &&
+ (TWeight + FWeight) != 0) {
+ BranchProbability BITrueProb =
+ BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
+ BranchProbability Likely = TTI.getPredictableBranchThreshold();
+ BranchProbability BIFalseProb = BITrueProb.getCompl();
+ if (IfBlocks.size() == 1) {
+ BranchProbability BIBBProb =
+ DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
+ if (BIBBProb >= Likely)
+ return false;
+ } else {
+ if (BITrueProb >= Likely || BIFalseProb >= Likely)
+ return false;
+ }
+ }
+ }
+
+ // Don't try to fold an unreachable block. For example, the phi node itself
+ // can't be the candidate if-condition for a select that we want to form.
+ if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
+ if (IfCondPhiInst->getParent() == BB)
+ return false;
+
// Okay, we found that we can merge this two-entry phi node into a select.
// Doing so would require us to fold *all* two entry phi nodes in this block.
// At some point this becomes non-profitable (particularly if the target
@@ -2565,7 +2785,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
- int BudgetRemaining =
+ InstructionCost Cost = 0;
+ InstructionCost Budget =
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
bool Changed = false;
@@ -2578,10 +2799,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
continue;
}
- if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
- BudgetRemaining, TTI) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
- BudgetRemaining, TTI))
+ if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
+ Cost, Budget, TTI) ||
+ !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
+ Cost, Budget, TTI))
return Changed;
}
@@ -2600,13 +2821,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
return match(V0, m_Not(m_Value())) && match(V1, Invertible);
};
- // Don't fold i1 branches on PHIs which contain binary operators, unless one
- // of the incoming values is an 'not' and another one is freely invertible.
+ // Don't fold i1 branches on PHIs which contain binary operators or
+ // (possibly inverted) select form of or/ands, unless one of
+ // the incoming values is an 'not' and another one is freely invertible.
// These can often be turned into switches and other things.
+ auto IsBinOpOrAnd = [](Value *V) {
+ return match(
+ V, m_CombineOr(
+ m_BinOp(),
+ m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()),
+ m_Select(m_Value(), m_Value(), m_ImmConstant()))));
+ };
if (PN->getType()->isIntegerTy(1) &&
- (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
- isa<BinaryOperator>(PN->getIncomingValue(1)) ||
- isa<BinaryOperator>(IfCond)) &&
+ (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
+ IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
!CanHoistNotFromBothValues(PN->getIncomingValue(0),
PN->getIncomingValue(1)))
return Changed;
@@ -2615,14 +2843,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// in the predecessor blocks can be promoted as well. If not, we won't be able
// to get rid of the control flow, so it's not worth promoting to select
// instructions.
- BasicBlock *DomBlock = nullptr;
- BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
- BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
- if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
- IfBlock1 = nullptr;
- } else {
- DomBlock = *pred_begin(IfBlock1);
- for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
+ for (BasicBlock *IfBlock : IfBlocks)
+ for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
!isa<PseudoProbeInst>(I)) {
// This is not an aggressive instruction that we can promote.
@@ -2630,22 +2852,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// the xform is not worth it.
return Changed;
}
- }
- if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
- IfBlock2 = nullptr;
- } else {
- DomBlock = *pred_begin(IfBlock2);
- for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
- !isa<PseudoProbeInst>(I)) {
- // This is not an aggressive instruction that we can promote.
- // Because of this, we won't be able to get rid of the control flow, so
- // the xform is not worth it.
- return Changed;
- }
- }
- assert(DomBlock && "Failed to find root DomBlock");
+ // If either of the blocks has it's address taken, we can't do this fold.
+ if (any_of(IfBlocks,
+ [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
+ return Changed;
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
<< " T: " << IfTrue->getName()
@@ -2653,16 +2864,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
- Instruction *InsertPt = DomBlock->getTerminator();
- IRBuilder<NoFolder> Builder(InsertPt);
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
- if (IfBlock1)
- hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
- if (IfBlock2)
- hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
+ for (BasicBlock *IfBlock : IfBlocks)
+ hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
+ IRBuilder<NoFolder> Builder(DomBI);
// Propagate fast-math-flags from phi nodes to replacement selects.
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
@@ -2670,20 +2878,18 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
Builder.setFastMathFlags(PN->getFastMathFlags());
// Change the PHI node into a select instruction.
- Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
- Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+ Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
+ Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
- Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt);
+ Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
PN->replaceAllUsesWith(Sel);
Sel->takeName(PN);
PN->eraseFromParent();
}
- // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // At this point, all IfBlocks are empty, so our if statement
// has been flattened. Change DomBlock to jump directly to our new block to
// avoid other simplifycfg's kicking in on the diamond.
- Instruction *OldTI = DomBlock->getTerminator();
- Builder.SetInsertPoint(OldTI);
Builder.CreateBr(BB);
SmallVector<DominatorTree::UpdateType, 3> Updates;
@@ -2693,115 +2899,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
}
- OldTI->eraseFromParent();
+ DomBI->eraseFromParent();
if (DTU)
DTU->applyUpdates(Updates);
return true;
}
-/// If we found a conditional branch that goes to two returning blocks,
-/// try to merge them together into one return,
-/// introducing a select if the return values disagree.
-bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
- IRBuilder<> &Builder) {
- auto *BB = BI->getParent();
- assert(BI->isConditional() && "Must be a conditional branch");
- BasicBlock *TrueSucc = BI->getSuccessor(0);
- BasicBlock *FalseSucc = BI->getSuccessor(1);
- // NOTE: destinations may match, this could be degenerate uncond branch.
- ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
- ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
-
- // Check to ensure both blocks are empty (just a return) or optionally empty
- // with PHI nodes. If there are other instructions, merging would cause extra
- // computation on one path or the other.
- if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
- return false;
- if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
- return false;
-
- Builder.SetInsertPoint(BI);
- // Okay, we found a branch that is going to two return nodes. If
- // there is no return value for this function, just change the
- // branch into a return.
- if (FalseRet->getNumOperands() == 0) {
- TrueSucc->removePredecessor(BB);
- FalseSucc->removePredecessor(BB);
- Builder.CreateRetVoid();
- EraseTerminatorAndDCECond(BI);
- if (DTU) {
- SmallVector<DominatorTree::UpdateType, 2> Updates;
- Updates.push_back({DominatorTree::Delete, BB, TrueSucc});
- if (TrueSucc != FalseSucc)
- Updates.push_back({DominatorTree::Delete, BB, FalseSucc});
- DTU->applyUpdates(Updates);
- }
- return true;
- }
-
- // Otherwise, figure out what the true and false return values are
- // so we can insert a new select instruction.
- Value *TrueValue = TrueRet->getReturnValue();
- Value *FalseValue = FalseRet->getReturnValue();
-
- // Unwrap any PHI nodes in the return blocks.
- if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
- if (TVPN->getParent() == TrueSucc)
- TrueValue = TVPN->getIncomingValueForBlock(BB);
- if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
- if (FVPN->getParent() == FalseSucc)
- FalseValue = FVPN->getIncomingValueForBlock(BB);
-
- // In order for this transformation to be safe, we must be able to
- // unconditionally execute both operands to the return. This is
- // normally the case, but we could have a potentially-trapping
- // constant expression that prevents this transformation from being
- // safe.
- if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
- if (TCV->canTrap())
- return false;
- if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
- if (FCV->canTrap())
- return false;
-
- // Okay, we collected all the mapped values and checked them for sanity, and
- // defined to really do this transformation. First, update the CFG.
- TrueSucc->removePredecessor(BB);
- FalseSucc->removePredecessor(BB);
-
- // Insert select instructions where needed.
- Value *BrCond = BI->getCondition();
- if (TrueValue) {
- // Insert a select if the results differ.
- if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
- } else if (isa<UndefValue>(TrueValue)) {
- TrueValue = FalseValue;
- } else {
- TrueValue =
- Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI);
- }
- }
-
- Value *RI =
- !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
-
- (void)RI;
-
- LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
- << "\n " << *BI << "\nNewRet = " << *RI << "\nTRUEBLOCK: "
- << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc);
-
- EraseTerminatorAndDCECond(BI);
- if (DTU) {
- SmallVector<DominatorTree::UpdateType, 2> Updates;
- Updates.push_back({DominatorTree::Delete, BB, TrueSucc});
- if (TrueSucc != FalseSucc)
- Updates.push_back({DominatorTree::Delete, BB, FalseSucc});
- DTU->applyUpdates(Updates);
- }
-
- return true;
+static Value *createLogicalOp(IRBuilderBase &Builder,
+ Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS, const Twine &Name = "") {
+ // Try to relax logical op to binary op.
+ if (impliesPoison(RHS, LHS))
+ return Builder.CreateBinOp(Opc, LHS, RHS, Name);
+ if (Opc == Instruction::And)
+ return Builder.CreateLogicalAnd(LHS, RHS, Name);
+ if (Opc == Instruction::Or)
+ return Builder.CreateLogicalOr(LHS, RHS, Name);
+ llvm_unreachable("Invalid logical opcode");
}
/// Return true if either PBI or BI has branch weight available, and store
@@ -2827,30 +2942,53 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
}
}
-// Determine if the two branches share a common destination,
-// and deduce a glue that we need to use to join branch's conditions
-// to arrive at the common destination.
+/// Determine if the two branches share a common destination and deduce a glue
+/// that joins the branches' conditions to arrive at the common destination if
+/// that would be profitable.
static Optional<std::pair<Instruction::BinaryOps, bool>>
-CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
+shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
+ const TargetTransformInfo *TTI) {
assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
"Both blocks must end with a conditional branches.");
assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
"PredBB must be a predecessor of BB.");
- if (PBI->getSuccessor(0) == BI->getSuccessor(0))
- return {{Instruction::Or, false}};
- else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
- return {{Instruction::And, false}};
- else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
- return {{Instruction::And, true}};
- else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
- return {{Instruction::Or, true}};
+ // We have the potential to fold the conditions together, but if the
+ // predecessor branch is predictable, we may not want to merge them.
+ uint64_t PTWeight, PFWeight;
+ BranchProbability PBITrueProb, Likely;
+ if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
+ PBI->extractProfMetadata(PTWeight, PFWeight) &&
+ (PTWeight + PFWeight) != 0) {
+ PBITrueProb =
+ BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
+ Likely = TTI->getPredictableBranchThreshold();
+ }
+
+ if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+ // Speculate the 2nd condition unless the 1st is probably true.
+ if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
+ return {{Instruction::Or, false}};
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+ // Speculate the 2nd condition unless the 1st is probably false.
+ if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
+ return {{Instruction::And, false}};
+ } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+ // Speculate the 2nd condition unless the 1st is probably true.
+ if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
+ return {{Instruction::And, true}};
+ } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+ // Speculate the 2nd condition unless the 1st is probably false.
+ if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
+ return {{Instruction::Or, true}};
+ }
return None;
}
-static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
+static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ const TargetTransformInfo *TTI) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();
@@ -2858,7 +2996,7 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
Instruction::BinaryOps Opc;
bool InvertPredCond;
std::tie(Opc, InvertPredCond) =
- *CheckIfCondBranchesShareCommonDestination(BI, PBI);
+ *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
@@ -2949,9 +3087,9 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
// Now that the Cond was cloned into the predecessor basic block,
// or/and the two conditions together.
- Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(
- Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond"));
- PBI->setCondition(NewCond);
+ Value *BICond = VMap[BI->getCondition()];
+ PBI->setCondition(
+ createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
// Copy any debug value intrinsics into the end of PredBlock.
for (Instruction &I : *BB) {
@@ -2980,11 +3118,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
return false;
BasicBlock *BB = BI->getParent();
-
- const unsigned PredCount = pred_size(BB);
-
- bool Changed = false;
-
TargetTransformInfo::TargetCostKind CostKind =
BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
: TargetTransformInfo::TCK_SizeAndLatency;
@@ -2993,49 +3126,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
- return Changed;
-
- // Only allow this transformation if computing the condition doesn't involve
- // too many instructions and these involved instructions can be executed
- // unconditionally. We denote all involved instructions except the condition
- // as "bonus instructions", and only allow this transformation when the
- // number of the bonus instructions we'll need to create when cloning into
- // each predecessor does not exceed a certain threshold.
- unsigned NumBonusInsts = 0;
- for (Instruction &I : *BB) {
- // Don't check the branch condition comparison itself.
- if (&I == Cond)
- continue;
- // Ignore dbg intrinsics, and the terminator.
- if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
- continue;
- // I must be safe to execute unconditionally.
- if (!isSafeToSpeculativelyExecute(&I))
- return Changed;
-
- // Account for the cost of duplicating this instruction into each
- // predecessor.
- NumBonusInsts += PredCount;
- // Early exits once we reach the limit.
- if (NumBonusInsts > BonusInstThreshold)
- return Changed;
- }
+ return false;
// Cond is known to be a compare or binary operator. Check to make sure that
// neither operand is a potentially-trapping constant expression.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
if (CE->canTrap())
- return Changed;
+ return false;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
if (CE->canTrap())
- return Changed;
+ return false;
// Finally, don't infinitely unroll conditional loops.
if (is_contained(successors(BB), BB))
- return Changed;
+ return false;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *PredBlock = *PI;
+ // With which predecessors will we want to deal with?
+ SmallVector<BasicBlock *, 8> Preds;
+ for (BasicBlock *PredBlock : predecessors(BB)) {
BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
// Check that we have two conditional branches. If there is a PHI node in
@@ -3047,8 +3155,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Determine if the two branches share a common destination.
Instruction::BinaryOps Opc;
bool InvertPredCond;
- if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
- std::tie(Opc, InvertPredCond) = *Recepie;
+ if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
+ std::tie(Opc, InvertPredCond) = *Recipe;
else
continue;
@@ -3056,7 +3164,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// transformation.
if (TTI) {
Type *Ty = BI->getCondition()->getType();
- unsigned Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
+ InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
!isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
@@ -3065,9 +3173,48 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
continue;
}
- return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU);
+ // Ok, we do want to deal with this predecessor. Record it.
+ Preds.emplace_back(PredBlock);
}
- return Changed;
+
+ // If there aren't any predecessors into which we can fold,
+ // don't bother checking the cost.
+ if (Preds.empty())
+ return false;
+
+ // Only allow this transformation if computing the condition doesn't involve
+ // too many instructions and these involved instructions can be executed
+ // unconditionally. We denote all involved instructions except the condition
+ // as "bonus instructions", and only allow this transformation when the
+ // number of the bonus instructions we'll need to create when cloning into
+ // each predecessor does not exceed a certain threshold.
+ unsigned NumBonusInsts = 0;
+ const unsigned PredCount = Preds.size();
+ for (Instruction &I : *BB) {
+ // Don't check the branch condition comparison itself.
+ if (&I == Cond)
+ continue;
+ // Ignore dbg intrinsics, and the terminator.
+ if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
+ continue;
+ // I must be safe to execute unconditionally.
+ if (!isSafeToSpeculativelyExecute(&I))
+ return false;
+
+ // Account for the cost of duplicating this instruction into each
+ // predecessor.
+ NumBonusInsts += PredCount;
+ // Early exits once we reach the limit.
+ if (NumBonusInsts > BonusInstThreshold)
+ return false;
+ }
+
+ // Ok, we have the budget. Perform the transformation.
+ for (BasicBlock *PredBlock : Preds) {
+ auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
+ return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
+ }
+ return false;
}
// If there is only one store in BB1 and BB2, return it, otherwise return
@@ -3190,7 +3337,8 @@ static bool mergeConditionalStoreToAddress(
// Heuristic: if the block can be if-converted/phi-folded and the
// instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
// thread this store.
- int BudgetRemaining =
+ InstructionCost Cost = 0;
+ InstructionCost Budget =
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
for (auto &I : BB->instructionsWithoutDebug()) {
// Consider terminator instruction to be free.
@@ -3206,11 +3354,11 @@ static bool mergeConditionalStoreToAddress(
return false; // Not in white-list - not worthwhile folding.
// And finally, if this is a non-free instruction that we are okay
// speculating, ensure that we consider the speculation budget.
- BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
- if (BudgetRemaining < 0)
+ Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ if (Cost > Budget)
return false; // Eagerly refuse to fold as soon as we're out of budget.
}
- assert(BudgetRemaining >= 0 &&
+ assert(Cost <= Budget &&
"When we run out of budget we will eagerly return from within the "
"per-instruction loop.");
return true;
@@ -3594,7 +3742,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
BasicBlock *InfLoopBlock =
BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
BranchInst::Create(InfLoopBlock, InfLoopBlock);
- Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
OtherDest = InfLoopBlock;
}
@@ -3614,18 +3763,20 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
// Merge the conditions.
- Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
+ Value *Cond =
+ createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
// Modify PBI to branch on the new condition to the new dests.
PBI->setCondition(Cond);
PBI->setSuccessor(0, CommonDest);
PBI->setSuccessor(1, OtherDest);
- Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
- Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
+ Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
- if (DTU)
DTU->applyUpdates(Updates);
+ }
// Update branch weight for PBI.
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
@@ -3714,7 +3865,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *KeepEdge1 = TrueBB;
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
- SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
+ SmallPtrSet<BasicBlock *, 2> RemovedSuccessors;
// Then remove the rest.
for (BasicBlock *Succ : successors(OldTerm)) {
@@ -3944,17 +4095,19 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
SIW.setSuccessorWeight(0, *NewW);
}
SIW.addCase(Cst, NewBB, NewW);
- Updates.push_back({DominatorTree::Insert, Pred, NewBB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
}
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
Builder.SetCurrentDebugLocation(SI->getDebugLoc());
Builder.CreateBr(SuccBlock);
- Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
PHIUse->addIncoming(NewCst, NewBB);
- if (DTU)
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -4011,11 +4164,6 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
BasicBlock *BB = BI->getParent();
- // MSAN does not like undefs as branch condition which can be introduced
- // with "explicit branch".
- if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
- return false;
-
LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
<< " cases into SWITCH. BB is:\n"
<< *BB);
@@ -4033,6 +4181,16 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
Instruction *OldTI = BB->getTerminator();
Builder.SetInsertPoint(OldTI);
+ // There can be an unintended UB if extra values are Poison. Before the
+ // transformation, extra values may not be evaluated according to the
+ // condition, and it will not raise UB. But after transformation, we are
+ // evaluating extra values before checking the condition, and it will raise
+ // UB. It can be solved by adding freeze instruction to extra values.
+ AssumptionCache *AC = Options.AC;
+
+ if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
+ ExtraCase = Builder.CreateFreeze(ExtraCase);
+
if (TrueWhenEqual)
Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
else
@@ -4040,7 +4198,8 @@ bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
OldTI->eraseFromParent();
- Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
// for the edge we just added.
@@ -4162,9 +4321,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
BB->removePredecessor(TrivialBB, true);
- for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
- PI != PE;) {
- BasicBlock *Pred = *PI++;
+ for (BasicBlock *Pred :
+ llvm::make_early_inc_range(predecessors(TrivialBB))) {
removeUnwindEdge(Pred, DTU);
++NumInvokes;
}
@@ -4181,12 +4339,8 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
}
// Delete the resume block if all its predecessors have been removed.
- if (pred_empty(BB)) {
- if (DTU)
- DTU->deleteBB(BB);
- else
- BB->eraseFromParent();
- }
+ if (pred_empty(BB))
+ DeleteDeadBlock(BB, DTU);
return !TrivialUnwindBlocks.empty();
}
@@ -4204,17 +4358,13 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
- BasicBlock *Pred = *PI++;
+ for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
removeUnwindEdge(Pred, DTU);
++NumInvokes;
}
// The landingpad is now unreachable. Zap it.
- if (DTU)
- DTU->deleteBB(BB);
- else
- BB->eraseFromParent();
+ DeleteDeadBlock(BB, DTU);
return true;
}
@@ -4256,12 +4406,8 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
if (UnwindDest) {
// First, go through the PHI nodes in UnwindDest and update any nodes that
// reference the block we are removing
- for (BasicBlock::iterator I = UnwindDest->begin(),
- IE = DestEHPad->getIterator();
- I != IE; ++I) {
- PHINode *DestPN = cast<PHINode>(I);
-
- int Idx = DestPN->getBasicBlockIndex(BB);
+ for (PHINode &DestPN : UnwindDest->phis()) {
+ int Idx = DestPN.getBasicBlockIndex(BB);
// Since BB unwinds to UnwindDest, it has to be in the PHI node.
assert(Idx != -1);
// This PHI node has an incoming value that corresponds to a control
@@ -4275,40 +4421,21 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
// predecessors must unwind to these blocks, and since no instruction
// can have multiple unwind destinations, there will be no overlap in
// incoming blocks between SrcPN and DestPN.
- Value *SrcVal = DestPN->getIncomingValue(Idx);
+ Value *SrcVal = DestPN.getIncomingValue(Idx);
PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
- // Remove the entry for the block we are deleting.
- DestPN->removeIncomingValue(Idx, false);
-
- if (SrcPN && SrcPN->getParent() == BB) {
- // If the incoming value was a PHI node in the cleanup pad we are
- // removing, we need to merge that PHI node's incoming values into
- // DestPN.
- for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
- SrcIdx != SrcE; ++SrcIdx) {
- DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
- SrcPN->getIncomingBlock(SrcIdx));
- }
- } else {
- // Otherwise, the incoming value came from above BB and
- // so we can just reuse it. We must associate all of BB's
- // predecessors with this value.
- for (auto *pred : predecessors(BB)) {
- DestPN->addIncoming(SrcVal, pred);
- }
+ bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
+ for (auto *Pred : predecessors(BB)) {
+ Value *Incoming =
+ NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
+ DestPN.addIncoming(Incoming, Pred);
}
}
// Sink any remaining PHI nodes directly into UnwindDest.
Instruction *InsertPt = DestEHPad;
- for (BasicBlock::iterator I = BB->begin(),
- IE = BB->getFirstNonPHI()->getIterator();
- I != IE;) {
- // The iterator must be incremented here because the instructions are
- // being moved to another block.
- PHINode *PN = cast<PHINode>(I++);
- if (PN->use_empty() || !PN->isUsedOutsideOfBlock(BB))
+ for (PHINode &PN : make_early_inc_range(BB->phis())) {
+ if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
// If the PHI node has no uses or all of its uses are in this basic
// block (meaning they are debug or lifetime intrinsics), just leave
// it. It will be erased when we erase BB below.
@@ -4320,36 +4447,40 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
// BB. In this case, the PHI value must reference itself.
for (auto *pred : predecessors(UnwindDest))
if (pred != BB)
- PN->addIncoming(PN, pred);
- PN->moveBefore(InsertPt);
+ PN.addIncoming(&PN, pred);
+ PN.moveBefore(InsertPt);
+ // Also, add a dummy incoming value for the original BB itself,
+ // so that the PHI is well-formed until we drop said predecessor.
+ PN.addIncoming(UndefValue::get(PN.getType()), BB);
}
}
std::vector<DominatorTree::UpdateType> Updates;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
- // The iterator must be updated here because we are removing this pred.
- BasicBlock *PredBB = *PI++;
+ // We use make_early_inc_range here because we will remove all predecessors.
+ for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) {
if (UnwindDest == nullptr) {
- if (DTU)
+ if (DTU) {
DTU->applyUpdates(Updates);
- Updates.clear();
+ Updates.clear();
+ }
removeUnwindEdge(PredBB, DTU);
++NumInvokes;
} else {
+ BB->removePredecessor(PredBB);
Instruction *TI = PredBB->getTerminator();
TI->replaceUsesOfWith(BB, UnwindDest);
- Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
+ }
}
}
- if (DTU) {
+ if (DTU)
DTU->applyUpdates(Updates);
- DTU->deleteBB(BB);
- } else
- // The cleanup pad is now unreachable. Zap it.
- BB->eraseFromParent();
+
+ DeleteDeadBlock(BB, DTU);
return true;
}
@@ -4403,61 +4534,7 @@ bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
return false;
}
-bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
- BasicBlock *BB = RI->getParent();
- if (!BB->getFirstNonPHIOrDbg()->isTerminator())
- return false;
-
- // Find predecessors that end with branches.
- SmallVector<BasicBlock *, 8> UncondBranchPreds;
- SmallVector<BranchInst *, 8> CondBranchPreds;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- Instruction *PTI = P->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
- if (BI->isUnconditional())
- UncondBranchPreds.push_back(P);
- else
- CondBranchPreds.push_back(BI);
- }
- }
-
- // If we found some, do the transformation!
- if (!UncondBranchPreds.empty() && DupRet) {
- while (!UncondBranchPreds.empty()) {
- BasicBlock *Pred = UncondBranchPreds.pop_back_val();
- LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
- (void)FoldReturnIntoUncondBranch(RI, BB, Pred, DTU);
- }
-
- // If we eliminated all predecessors of the block, delete the block now.
- if (pred_empty(BB)) {
- // We know there are no successors, so just nuke the block.
- if (DTU)
- DTU->deleteBB(BB);
- else
- BB->eraseFromParent();
- }
-
- return true;
- }
-
- // Check out all of the conditional branches going to this return
- // instruction. If any of them just select between returns, change the
- // branch itself into a select/return pair.
- while (!CondBranchPreds.empty()) {
- BranchInst *BI = CondBranchPreds.pop_back_val();
-
- // Check to see if the non-BB successor is also a return block.
- if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
- isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
- SimplifyCondBranchToTwoReturns(BI, Builder))
- return true;
- }
- return false;
-}
-
+// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
BasicBlock *BB = UI->getParent();
@@ -4468,46 +4545,19 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
while (UI->getIterator() != BB->begin()) {
BasicBlock::iterator BBI = UI->getIterator();
--BBI;
- // Do not delete instructions that can have side effects which might cause
- // the unreachable to not be reachable; specifically, calls and volatile
- // operations may have this effect.
- if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))
- break;
- if (BBI->mayHaveSideEffects()) {
- if (auto *SI = dyn_cast<StoreInst>(BBI)) {
- if (SI->isVolatile())
- break;
- } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI->isVolatile())
- break;
- } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
- if (RMWI->isVolatile())
- break;
- } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
- if (CXI->isVolatile())
- break;
- } else if (isa<CatchPadInst>(BBI)) {
- // A catchpad may invoke exception object constructors and such, which
- // in some languages can be arbitrary code, so be conservative by
- // default.
- // For CoreCLR, it just involves a type test, so can be removed.
- if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
- EHPersonality::CoreCLR)
- break;
- } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
- !isa<LandingPadInst>(BBI)) {
- break;
- }
- // Note that deleting LandingPad's here is in fact okay, although it
- // involves a bit of subtle reasoning. If this inst is a LandingPad,
- // all the predecessors of this block will be the unwind edges of Invokes,
- // and we can therefore guarantee this block will be erased.
- }
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
+ break; // Can not drop any more instructions. We're done here.
+ // Otherwise, this instruction can be freely erased,
+ // even if it is not side-effect free.
+
+ // Note that deleting EH's here is in fact okay, although it involves a bit
+ // of subtle reasoning. If this inst is an EH, all the predecessors of this
+ // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
+ // and we can therefore guarantee this block will be erased.
// Delete this instruction (any uses are guaranteed to be dead)
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
BBI->eraseFromParent();
Changed = true;
}
@@ -4548,7 +4598,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
EraseTerminatorAndDCECond(BI);
Changed = true;
}
- Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
SwitchInstProfUpdateWrapper SU(*SI);
for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
@@ -4562,21 +4613,23 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
// Note that the default destination can't be removed!
- if (SI->getDefaultDest() != BB)
+ if (DTU && SI->getDefaultDest() != BB)
Updates.push_back({DominatorTree::Delete, Predecessor, BB});
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
- if (DTU)
+ if (DTU) {
DTU->applyUpdates(Updates);
- Updates.clear();
+ Updates.clear();
+ }
removeUnwindEdge(TI->getParent(), DTU);
Changed = true;
}
} else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
if (CSI->getUnwindDest() == BB) {
- if (DTU)
+ if (DTU) {
DTU->applyUpdates(Updates);
- Updates.clear();
+ Updates.clear();
+ }
removeUnwindEdge(TI->getParent(), DTU);
Changed = true;
continue;
@@ -4592,23 +4645,28 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
}
- Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
if (CSI->getNumHandlers() == 0) {
if (CSI->hasUnwindDest()) {
// Redirect all predecessors of the block containing CatchSwitchInst
// to instead branch to the CatchSwitchInst's unwind destination.
- for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
- Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor,
- CSI->getUnwindDest()});
- Updates.push_back(
- {DominatorTree::Delete, PredecessorOfPredecessor, Predecessor});
+ if (DTU) {
+ for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
+ Updates.push_back({DominatorTree::Insert,
+ PredecessorOfPredecessor,
+ CSI->getUnwindDest()});
+ Updates.push_back({DominatorTree::Delete,
+ PredecessorOfPredecessor, Predecessor});
+ }
}
Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
} else {
// Rewrite all preds to unwind to caller (or from invoke to call).
- if (DTU)
+ if (DTU) {
DTU->applyUpdates(Updates);
- Updates.clear();
+ Updates.clear();
+ }
SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
for (BasicBlock *EHPred : EHPreds)
removeUnwindEdge(EHPred, DTU);
@@ -4622,7 +4680,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
(void)CRI;
assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
"Expected to always have an unwind to BB.");
- Updates.push_back({DominatorTree::Delete, Predecessor, BB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, Predecessor, BB});
new UnreachableInst(TI->getContext(), TI);
TI->eraseFromParent();
Changed = true;
@@ -4634,11 +4693,7 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
// If this block is now dead, remove it.
if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
- // We know there are no successors, so just nuke the block.
- if (DTU)
- DTU->deleteBB(BB);
- else
- BB->eraseFromParent();
+ DeleteDeadBlock(BB, DTU);
return true;
}
@@ -4669,8 +4724,9 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch,
{DominatorTree::Delete, BB, OrigDefaultBlock}});
SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU);
SmallVector<DominatorTree::UpdateType, 2> Updates;
- for (auto *Successor : successors(NewDefaultBlock))
- Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
+ if (DTU)
+ for (auto *Successor : successors(NewDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
auto *NewTerminator = NewDefaultBlock->getTerminator();
new UnreachableInst(Switch->getContext(), NewTerminator);
EraseTerminatorAndDCECond(NewTerminator);
@@ -4822,15 +4878,17 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
- SmallMapVector<BasicBlock *, int, 8> NumPerSuccessorCases;
+ SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
for (auto &Case : SI->cases()) {
auto *Successor = Case.getCaseSuccessor();
- ++NumPerSuccessorCases[Successor];
+ if (DTU)
+ ++NumPerSuccessorCases[Successor];
const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
- --NumPerSuccessorCases[Successor];
+ if (DTU)
+ --NumPerSuccessorCases[Successor];
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
<< " is dead.\n");
}
@@ -4865,12 +4923,13 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
SIW.removeCase(CaseI);
}
- std::vector<DominatorTree::UpdateType> Updates;
- for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
- if (I.second == 0)
- Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
- if (DTU)
+ if (DTU) {
+ std::vector<DominatorTree::UpdateType> Updates;
+ for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
+ if (I.second == 0)
+ Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
DTU->applyUpdates(Updates);
+ }
return true;
}
@@ -5197,11 +5256,9 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
Constant *DefaultResult, Value *Condition,
IRBuilder<> &Builder) {
- assert(ResultVector.size() == 2 &&
- "We should have exactly two unique results at this point");
// If we are selecting between only two cases transform into a simple
// select or a two-way select if default is possible.
- if (ResultVector[0].second.size() == 1 &&
+ if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
ResultVector[1].second.size() == 1) {
ConstantInt *const FirstCase = ResultVector[0].second[0];
ConstantInt *const SecondCase = ResultVector[1].second[0];
@@ -5220,6 +5277,17 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
SelectValue, "switch.select");
}
+ // Handle the degenerate case where two cases have the same value.
+ if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 &&
+ DefaultResult) {
+ Value *Cmp1 = Builder.CreateICmpEQ(
+ Condition, ResultVector[0].second[0], "switch.selectcmp.case1");
+ Value *Cmp2 = Builder.CreateICmpEQ(
+ Condition, ResultVector[0].second[1], "switch.selectcmp.case2");
+ Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
+
return nullptr;
}
@@ -5234,7 +5302,7 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
BasicBlock *SelectBB = SI->getParent();
BasicBlock *DestBB = PHI->getParent();
- if (!is_contained(predecessors(DestBB), SelectBB))
+ if (DTU && !is_contained(predecessors(DestBB), SelectBB))
Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
Builder.CreateBr(DestBB);
@@ -5244,13 +5312,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
PHI->removeIncomingValue(SelectBB);
PHI->addIncoming(SelectValue, SelectBB);
+ SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == DestBB)
continue;
Succ->removePredecessor(SelectBB);
- Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
+ if (DTU && RemovedSuccessors.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
}
SI->eraseFromParent();
if (DTU)
@@ -5270,10 +5340,8 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
SwitchCaseResultVectorTy UniqueResults;
// Collect all the cases that will deliver the same value from the switch.
if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
- DL, TTI, 2, 1))
- return false;
- // Selects choose between maximum two values.
- if (UniqueResults.size() != 2)
+ DL, TTI, /*MaxUniqueResults*/2,
+ /*MaxCasesPerResult*/2))
return false;
assert(PHI != nullptr && "PHI for value select not found");
@@ -5642,8 +5710,7 @@ static void reuseTableCompare(
// Although this check is invariant in the calling loops, it's better to do it
// at this late stage. Practically we do it at most once for a switch.
BasicBlock *BranchBlock = RangeCheckBranch->getParent();
- for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
+ for (BasicBlock *Pred : predecessors(PhiBlock)) {
if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
return;
}
@@ -5675,7 +5742,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Only build lookup table when we have a target that supports it or the
// attribute is not set.
if (!TTI.shouldBuildLookupTables() ||
- (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true"))
+ (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
return false;
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
@@ -5799,7 +5866,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
- Updates.push_back({DominatorTree::Insert, BB, LookupBB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
// Note: We call removeProdecessor later since we need to be able to get the
// PHI value for the default case in case we're using a bit mask.
} else {
@@ -5807,7 +5875,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
- Updates.push_back({DominatorTree::Insert, BB, LookupBB});
+ if (DTU)
+ Updates.push_back({DominatorTree::Insert, BB, LookupBB});
}
// Populate the BB that does the lookups.
@@ -5845,8 +5914,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *LoBit = Builder.CreateTrunc(
Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
- Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
- Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
+ Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
+ }
Builder.SetInsertPoint(LookupBB);
AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
}
@@ -5856,10 +5927,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// do not delete PHINodes here.
SI->getDefaultDest()->removePredecessor(BB,
/*KeepOneInputPHIs=*/true);
- Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
}
- bool ReturnedEarly = false;
for (PHINode *PHI : PHIs) {
const ResultListTy &ResultList = ResultLists[PHI];
@@ -5871,15 +5942,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
Value *Result = Table.BuildLookup(TableIndex, Builder);
- // If the result is used to return immediately from the function, we want to
- // do that right here.
- if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
- PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
- Builder.CreateRet(Result);
- ReturnedEarly = true;
- break;
- }
-
// Do a small peephole optimization: re-use the switch table compare if
// possible.
if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
@@ -5893,13 +5955,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
PHI->addIncoming(Result, LookupBB);
}
- if (!ReturnedEarly) {
- Builder.CreateBr(CommonDest);
+ Builder.CreateBr(CommonDest);
+ if (DTU)
Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
- }
// Remove the switch.
- SmallSetVector<BasicBlock *, 8> RemovedSuccessors;
+ SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
@@ -6081,7 +6142,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
- SmallSetVector<BasicBlock *, 8> RemovedSuccs;
+ SmallPtrSet<BasicBlock *, 8> RemovedSuccs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
BasicBlock *Dest = IBI->getDestination(i);
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
@@ -6171,15 +6232,16 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
// We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
- SmallPtrSet<BasicBlock *, 16> Preds;
- Preds.insert(pred_begin(BB), pred_end(BB));
+ SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
for (BasicBlock *Pred : Preds) {
InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
"unexpected successor");
II->setUnwindDest(OtherPred);
- Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
- Updates.push_back({DominatorTree::Delete, Pred, BB});
+ if (DTU) {
+ Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
+ Updates.push_back({DominatorTree::Delete, Pred, BB});
+ }
}
// The debug info in OtherPred doesn't cover the merged control flow that
@@ -6191,11 +6253,11 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
Inst.eraseFromParent();
}
- SmallPtrSet<BasicBlock *, 16> Succs;
- Succs.insert(succ_begin(BB), succ_end(BB));
+ SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB));
for (BasicBlock *Succ : Succs) {
Succ->removePredecessor(BB);
- Updates.push_back({DominatorTree::Delete, BB, Succ});
+ if (DTU)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
IRBuilder<> Builder(BI);
@@ -6229,7 +6291,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
Options.NeedCanonicalLoop &&
(!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
(is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
- BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
return true;
@@ -6290,8 +6352,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
// This block must be empty, except for the setcond inst, if it exists.
- // Ignore dbg intrinsics.
- auto I = BB->instructionsWithoutDebug().begin();
+ // Ignore dbg and pseudo intrinsics.
+ auto I = BB->instructionsWithoutDebug(true).begin();
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
return requestResimplify();
@@ -6332,9 +6394,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistCommon && Options.HoistCommonInsts)
- if (HoistThenElseCodeToIf(BI, TTI))
- return requestResimplify();
+ if (HoistCommon &&
+ HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
+ return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
@@ -6362,8 +6424,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
// Scan predecessor blocks for conditional branches.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ for (BasicBlock *Pred : predecessors(BB))
+ if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
return requestResimplify();
@@ -6397,9 +6459,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
for (BasicBlock::iterator
i = ++BasicBlock::iterator(I),
UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
- i != UI; ++i)
- if (i == I->getParent()->end() || i->mayHaveSideEffects())
+ i != UI; ++i) {
+ if (i == I->getParent()->end())
return false;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*i))
+ return false;
+ }
// Look through GEPs. A load from a GEP derived from NULL is still undefined
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
@@ -6437,8 +6502,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
for (const llvm::Use &Arg : CB->args())
if (Arg == I) {
unsigned ArgIdx = CB->getArgOperandNo(&Arg);
- if (CB->paramHasAttr(ArgIdx, Attribute::NonNull) &&
- CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) {
+ if (CB->isPassingUndefUB(ArgIdx) &&
+ CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
// Passing null to a nonnnull+noundef argument is undefined.
return !PtrValueMayBeModified;
}
@@ -6448,7 +6513,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
for (const llvm::Use &Arg : CB->args())
if (Arg == I) {
unsigned ArgIdx = CB->getArgOperandNo(&Arg);
- if (CB->paramHasAttr(ArgIdx, Attribute::NoUndef)) {
+ if (CB->isPassingUndefUB(ArgIdx)) {
// Passing undef to a noundef argument is undefined.
return true;
}
@@ -6522,7 +6587,14 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
return true;
if (SinkCommon && Options.SinkCommonInsts)
- Changed |= SinkCommonCodeFromPredecessors(BB, DTU);
+ if (SinkCommonCodeFromPredecessors(BB, DTU)) {
+ // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
+ // so we may now how duplicate PHI's.
+ // Let's rerun EliminateDuplicatePHINodes() first,
+ // before FoldTwoEntryPHINode() potentially converts them into select's,
+ // after which we'd need a whole EarlyCSE pass run to cleanup them.
+ return true;
+ }
IRBuilder<> Builder(BB);
@@ -6540,9 +6612,6 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
case Instruction::Br:
Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
break;
- case Instruction::Ret:
- Changed |= simplifyReturn(cast<ReturnInst>(Terminator), Builder);
- break;
case Instruction::Resume:
Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
break;
@@ -6566,20 +6635,10 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
bool Changed = simplifyOnceImpl(BB);
- assert((!RequireAndPreserveDomTree ||
- (DTU &&
- DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
- "Failed to maintain validity of domtree!");
-
return Changed;
}
bool SimplifyCFGOpt::run(BasicBlock *BB) {
- assert((!RequireAndPreserveDomTree ||
- (DTU &&
- DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
- "Original domtree is invalid?");
-
bool Changed = false;
// Repeated simplify BB as long as resimplification is requested.
@@ -6597,7 +6656,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
ArrayRef<WeakVH> LoopHeaders) {
- return SimplifyCFGOpt(TTI, RequireAndPreserveDomTree ? DTU : nullptr,
- BB->getModule()->getDataLayout(), LoopHeaders, Options)
+ return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
+ Options)
.run(BB);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 290c04a7ad10..bd30be011472 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -99,6 +99,24 @@ namespace {
};
}
+/// Find a point in code which dominates all given instructions. We can safely
+/// assume that, whatever fact we can prove at the found point, this fact is
+/// also true for each of the given instructions.
+static Instruction *findCommonDominator(ArrayRef<Instruction *> Instructions,
+ DominatorTree &DT) {
+ Instruction *CommonDom = nullptr;
+ for (auto *Insn : Instructions)
+ if (!CommonDom || DT.dominates(Insn, CommonDom))
+ CommonDom = Insn;
+ else if (!DT.dominates(CommonDom, Insn))
+ // If there is no dominance relation, use common dominator.
+ CommonDom =
+ DT.findNearestCommonDominator(CommonDom->getParent(),
+ Insn->getParent())->getTerminator();
+ assert(CommonDom && "Common dominator not found?");
+ return CommonDom;
+}
+
/// Fold an IV operand into its use. This removes increments of an
/// aligned IV when used by a instruction that ignores the low bits.
///
@@ -261,14 +279,14 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
- // If the condition is always true or always false, replace it with
- // a constant value.
- if (SE->isKnownPredicate(Pred, S, X)) {
- ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- DeadInsts.emplace_back(ICmp);
- LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
- ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ // If the condition is always true or always false in the given context,
+ // replace it with a constant value.
+ SmallVector<Instruction *, 4> Users;
+ for (auto *U : ICmp->users())
+ Users.push_back(cast<Instruction>(U));
+ const Instruction *CtxI = findCommonDominator(Users, *DT);
+ if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) {
+ ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev));
DeadInsts.emplace_back(ICmp);
LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
} else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
@@ -404,46 +422,10 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
replaceSRemWithURem(Rem);
}
-static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
- bool Signed, const SCEV *LHS, const SCEV *RHS) {
- const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
- SCEV::NoWrapFlags, unsigned);
- switch (BinOp) {
- default:
- llvm_unreachable("Unsupported binary op");
- case Instruction::Add:
- Operation = &ScalarEvolution::getAddExpr;
- break;
- case Instruction::Sub:
- Operation = &ScalarEvolution::getMinusSCEV;
- break;
- case Instruction::Mul:
- Operation = &ScalarEvolution::getMulExpr;
- break;
- }
-
- const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
- Signed ? &ScalarEvolution::getSignExtendExpr
- : &ScalarEvolution::getZeroExtendExpr;
-
- // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
- auto *NarrowTy = cast<IntegerType>(LHS->getType());
- auto *WideTy =
- IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
-
- const SCEV *A =
- (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0),
- WideTy, 0);
- const SCEV *B =
- (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
- (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
- return A == B;
-}
-
bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
const SCEV *LHS = SE->getSCEV(WO->getLHS());
const SCEV *RHS = SE->getSCEV(WO->getRHS());
- if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
+ if (!SE->willNotOverflow(WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
return false;
// Proved no overflow, nuke the overflow check and, if possible, the overflow
@@ -484,7 +466,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
const SCEV *LHS = SE->getSCEV(SI->getLHS());
const SCEV *RHS = SE->getSCEV(SI->getRHS());
- if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
+ if (!SE->willNotOverflow(SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
return false;
BinaryOperator *BO = BinaryOperator::Create(
@@ -738,34 +720,25 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
/// unsigned-overflow. Returns true if anything changed, false otherwise.
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
Value *IVOperand) {
- // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
- if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
- return false;
-
- if (BO->getOpcode() != Instruction::Add &&
- BO->getOpcode() != Instruction::Sub &&
- BO->getOpcode() != Instruction::Mul)
- return false;
-
- const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
- const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
- bool Changed = false;
-
- if (!BO->hasNoUnsignedWrap() &&
- willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) {
- BO->setHasNoUnsignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
-
- if (!BO->hasNoSignedWrap() &&
- willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) {
- BO->setHasNoSignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
-
- return Changed;
+ SCEV::NoWrapFlags Flags;
+ bool Deduced;
+ std::tie(Flags, Deduced) = SE->getStrengthenedNoWrapFlagsFromBinOp(
+ cast<OverflowingBinaryOperator>(BO));
+
+ if (!Deduced)
+ return Deduced;
+
+ BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNUW) ==
+ SCEV::FlagNUW);
+ BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNSW) ==
+ SCEV::FlagNSW);
+
+ // The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
+ // flags on addrecs while performing zero/sign extensions. We could call
+ // forgetValue() here to make sure those flags also propagate to any other
+ // SCEV expressions based on the addrec. However, this can have pathological
+ // compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
+ return Deduced;
}
/// Annotate the Shr in (X << IVOperand) >> C as exact using the
@@ -1386,7 +1359,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
/// so, return the extended recurrence and the kind of extension used. Otherwise
/// return {nullptr, Unknown}.
WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
- if (!SE->isSCEVable(DU.NarrowUse->getType()))
+ if (!DU.NarrowUse->getType()->isIntegerTy())
return {nullptr, Unknown};
const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
@@ -1575,17 +1548,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
// We'll prove some facts that should be true in the context of ext users. If
// there is no users, we are done now. If there are some, pick their common
// dominator as context.
- Instruction *Context = nullptr;
- for (auto *Ext : ExtUsers) {
- if (!Context || DT->dominates(Ext, Context))
- Context = Ext;
- else if (!DT->dominates(Context, Ext))
- // For users that don't have dominance relation, use common dominator.
- Context =
- DT->findNearestCommonDominator(Context->getParent(), Ext->getParent())
- ->getTerminator();
- }
- assert(Context && "Context not found?");
+ const Instruction *CtxI = findCommonDominator(ExtUsers, *DT);
if (!CanSignExtend && !CanZeroExtend) {
// Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
@@ -1601,8 +1564,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
return false;
if (!SE->isKnownNegative(RHS))
return false;
- bool ProvedSubNUW = SE->isKnownPredicateAt(
- ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context);
+ bool ProvedSubNUW = SE->isKnownPredicateAt(ICmpInst::ICMP_UGE, LHS,
+ SE->getNegativeSCEV(RHS), CtxI);
if (!ProvedSubNUW)
return false;
// In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index f9a9dd237b6c..b8e0f63c481d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -56,38 +56,6 @@ static bool ignoreCallingConv(LibFunc Func) {
Func == LibFunc_llabs || Func == LibFunc_strlen;
}
-static bool isCallingConvCCompatible(CallInst *CI) {
- switch(CI->getCallingConv()) {
- default:
- return false;
- case llvm::CallingConv::C:
- return true;
- case llvm::CallingConv::ARM_APCS:
- case llvm::CallingConv::ARM_AAPCS:
- case llvm::CallingConv::ARM_AAPCS_VFP: {
-
- // The iOS ABI diverges from the standard in some cases, so for now don't
- // try to simplify those calls.
- if (Triple(CI->getModule()->getTargetTriple()).isiOS())
- return false;
-
- auto *FuncTy = CI->getFunctionType();
-
- if (!FuncTy->getReturnType()->isPointerTy() &&
- !FuncTy->getReturnType()->isIntegerTy() &&
- !FuncTy->getReturnType()->isVoidTy())
- return false;
-
- for (auto Param : FuncTy->params()) {
- if (!Param->isPointerTy() && !Param->isIntegerTy())
- return false;
- }
- return true;
- }
- }
- return false;
-}
-
/// Return true if it is only used in equality comparisons with With.
static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
for (User *U : V->users()) {
@@ -190,13 +158,16 @@ static void annotateDereferenceableBytes(CallInst *CI,
}
}
-static void annotateNonNullBasedOnAccess(CallInst *CI,
+static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI,
ArrayRef<unsigned> ArgNos) {
Function *F = CI->getCaller();
if (!F)
return;
for (unsigned ArgNo : ArgNos) {
+ if (!CI->paramHasAttr(ArgNo, Attribute::NoUndef))
+ CI->addParamAttr(ArgNo, Attribute::NoUndef);
+
if (CI->paramHasAttr(ArgNo, Attribute::NonNull))
continue;
unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
@@ -211,10 +182,10 @@ static void annotateNonNullBasedOnAccess(CallInst *CI,
static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
Value *Size, const DataLayout &DL) {
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size)) {
- annotateNonNullBasedOnAccess(CI, ArgNos);
+ annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue());
} else if (isKnownNonZero(Size, DL)) {
- annotateNonNullBasedOnAccess(CI, ArgNos);
+ annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
const APInt *X, *Y;
uint64_t DerefMin = 1;
if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) {
@@ -232,7 +203,7 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
- annotateNonNullBasedOnAccess(CI, {0, 1});
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -276,9 +247,9 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
Value *Src = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
uint64_t Len;
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
if (isKnownNonZero(Size, DL))
- annotateNonNullBasedOnAccess(CI, 1);
+ annotateNonNullNoUndefBasedOnAccess(CI, 1);
// We don't do anything if length is not constant.
ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size);
@@ -317,7 +288,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
Value *SrcStr = CI->getArgOperand(0);
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
@@ -361,7 +332,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
// Cannot fold anything if we're not looking for a constant.
if (!CharC)
@@ -437,7 +408,7 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
TLI);
}
- annotateNonNullBasedOnAccess(CI, {0, 1});
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
return nullptr;
}
@@ -449,7 +420,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
return ConstantInt::get(CI->getType(), 0);
if (isKnownNonZero(Size, DL))
- annotateNonNullBasedOnAccess(CI, {0, 1});
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
// Get the length argument if it is constant.
uint64_t Length;
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
@@ -527,7 +498,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
if (Dst == Src) // strcpy(x,x) -> x
return Src;
- annotateNonNullBasedOnAccess(CI, {0, 1});
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
if (Len)
@@ -580,9 +551,9 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
if (isKnownNonZero(Size, DL))
- annotateNonNullBasedOnAccess(CI, 1);
+ annotateNonNullNoUndefBasedOnAccess(CI, 1);
uint64_t Len;
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
@@ -604,8 +575,10 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
}
if (SrcLen == 0) {
- // strncpy(x, "", y) -> memset(align 1 x, '\0', y)
- CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, Align(1));
+ // strncpy(x, "", y) -> memset(x, '\0', y)
+ Align MemSetAlign =
+ CI->getAttributes().getParamAttributes(0).getAlignment().valueOrOne();
+ CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
@@ -728,7 +701,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
if (Value *V = optimizeStringLength(CI, B, 8))
return V;
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
return nullptr;
}
@@ -839,8 +812,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
StrLen, B, DL, TLI);
if (!StrNCmp)
return nullptr;
- for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
- ICmpInst *Old = cast<ICmpInst>(*UI++);
+ for (User *U : llvm::make_early_inc_range(CI->users())) {
+ ICmpInst *Old = cast<ICmpInst>(U);
Value *Cmp =
B.CreateICmp(Old->getPredicate(), StrNCmp,
ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
@@ -878,13 +851,13 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
- annotateNonNullBasedOnAccess(CI, {0, 1});
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
return nullptr;
}
Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
if (isKnownNonZero(CI->getOperand(2), DL))
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
return nullptr;
}
@@ -960,7 +933,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
// Finally merge both checks and cast to pointer type. The inttoptr
// implicitly zexts the i1 to intptr type.
- return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
+ return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
+ CI->getType());
}
// Check if all arguments are constants. If so, we can constant fold.
@@ -1451,17 +1425,18 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) {
return InnerChain[Exp];
}
-// Return a properly extended 32-bit integer if the operation is an itofp.
-static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B) {
+// Return a properly extended integer (DstWidth bits wide) if the operation is
+// an itofp.
+static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
if (isa<SIToFPInst>(I2F) || isa<UIToFPInst>(I2F)) {
Value *Op = cast<Instruction>(I2F)->getOperand(0);
- // Make sure that the exponent fits inside an int32_t,
+ // Make sure that the exponent fits inside an "int" of size DstWidth,
// thus avoiding any range issues that FP has not.
unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
- if (BitWidth < 32 ||
- (BitWidth == 32 && isa<SIToFPInst>(I2F)))
- return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getInt32Ty())
- : B.CreateZExt(Op, B.getInt32Ty());
+ if (BitWidth < DstWidth ||
+ (BitWidth == DstWidth && isa<SIToFPInst>(I2F)))
+ return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth))
+ : B.CreateZExt(Op, B.getIntNTy(DstWidth));
}
return nullptr;
@@ -1551,7 +1526,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
if (match(Base, m_SpecificFP(2.0)) &&
(isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
- if (Value *ExpoI = getIntToFPVal(Expo, B))
+ if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI,
LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
B, Attrs);
@@ -1690,7 +1665,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
IRBuilderBase &B) {
Value *Args[] = {Base, Expo};
- Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
+ Type *Types[] = {Base->getType(), Expo->getType()};
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Types);
return B.CreateCall(F, Args);
}
@@ -1701,20 +1677,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
StringRef Name = Callee->getName();
Type *Ty = Pow->getType();
Module *M = Pow->getModule();
- Value *Shrunk = nullptr;
bool AllowApprox = Pow->hasApproxFunc();
bool Ignored;
// Propagate the math semantics from the call to any created instructions.
IRBuilderBase::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Pow->getFastMathFlags());
-
- // Shrink pow() to powf() if the arguments are single precision,
- // unless the result is expected to be double precision.
- if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
- hasFloatVersion(Name))
- Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
-
// Evaluate special cases related to the base.
// pow(1.0, x) -> 1.0
@@ -1799,23 +1767,31 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
return FMul;
}
- APSInt IntExpo(32, /*isUnsigned=*/false);
+ APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
// powf(x, n) -> powi(x, n) if n is a constant signed integer value
if (ExpoF->isInteger() &&
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
APFloat::opOK) {
return createPowWithIntegerExponent(
- Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
+ Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo), M, B);
}
}
// powf(x, itofp(y)) -> powi(x, y)
if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
- if (Value *ExpoI = getIntToFPVal(Expo, B))
+ if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
return createPowWithIntegerExponent(Base, ExpoI, M, B);
}
- return Shrunk;
+ // Shrink pow() to powf() if the arguments are single precision,
+ // unless the result is expected to be double precision.
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
+ hasFloatVersion(Name)) {
+ if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, true))
+ return Shrunk;
+ }
+
+ return nullptr;
}
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
@@ -1830,11 +1806,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
Type *Ty = CI->getType();
Value *Op = CI->getArgOperand(0);
- // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
- // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize
if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
- if (Value *Exp = getIntToFPVal(Op, B))
+ if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize()))
return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
B, Attrs);
@@ -2404,18 +2380,28 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
if (FormatStr.size() == 1 || FormatStr == "%%")
return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
- // printf("%s", "a") --> putchar('a')
+ // Try to remove call or emit putchar/puts.
if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
- StringRef ChrStr;
- if (!getConstantStringInfo(CI->getOperand(1), ChrStr))
- return nullptr;
- if (ChrStr.size() != 1)
+ StringRef OperandStr;
+ if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
return nullptr;
- return emitPutChar(B.getInt32(ChrStr[0]), B, TLI);
+ // printf("%s", "") --> NOP
+ if (OperandStr.empty())
+ return (Value *)CI;
+ // printf("%s", "a") --> putchar('a')
+ if (OperandStr.size() == 1)
+ return emitPutChar(B.getInt32(OperandStr[0]), B, TLI);
+ // printf("%s", str"\n") --> puts(str)
+ if (OperandStr.back() == '\n') {
+ OperandStr = OperandStr.drop_back();
+ Value *GV = B.CreateGlobalString(OperandStr, "str");
+ return emitPutS(GV, B, TLI);
+ }
+ return nullptr;
}
// printf("foo\n") --> puts("foo")
- if (FormatStr[FormatStr.size() - 1] == '\n' &&
+ if (FormatStr.back() == '\n' &&
FormatStr.find('%') == StringRef::npos) { // No format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
@@ -2470,7 +2456,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
return New;
}
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
return nullptr;
}
@@ -2482,6 +2468,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
return nullptr;
// If we just have a format string (nothing else crazy) transform it.
+ Value *Dest = CI->getArgOperand(0);
if (CI->getNumArgOperands() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
@@ -2490,7 +2477,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
// sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
B.CreateMemCpy(
- CI->getArgOperand(0), Align(1), CI->getArgOperand(1), Align(1),
+ Dest, Align(1), CI->getArgOperand(1), Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
FormatStr.size() + 1)); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -2508,7 +2495,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
- Value *Ptr = castToCStr(CI->getArgOperand(0), B);
+ Value *Ptr = castToCStr(Dest, B);
B.CreateStore(V, Ptr);
Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
@@ -2524,19 +2511,20 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
if (CI->use_empty())
// sprintf(dest, "%s", str) -> strcpy(dest, str)
- return emitStrCpy(CI->getArgOperand(0), CI->getArgOperand(2), B, TLI);
+ return emitStrCpy(Dest, CI->getArgOperand(2), B, TLI);
uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
if (SrcLen) {
B.CreateMemCpy(
- CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1),
+ Dest, Align(1), CI->getArgOperand(2), Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
// Returns total number of characters written without null-character.
return ConstantInt::get(CI->getType(), SrcLen - 1);
- } else if (Value *V = emitStpCpy(CI->getArgOperand(0), CI->getArgOperand(2),
- B, TLI)) {
+ } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
// sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
- Value *PtrDiff = B.CreatePtrDiff(V, CI->getArgOperand(0));
+ // Handle mismatched pointer types (goes away with typeless pointers?).
+ V = B.CreatePointerCast(V, Dest->getType());
+ Value *PtrDiff = B.CreatePtrDiff(V, Dest);
return B.CreateIntCast(PtrDiff, CI->getType(), false);
}
@@ -2551,8 +2539,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
return nullptr;
Value *IncLen =
B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
- B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(2),
- Align(1), IncLen);
+ B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1), IncLen);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -2592,7 +2579,7 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
return New;
}
- annotateNonNullBasedOnAccess(CI, {0, 1});
+ annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
return nullptr;
}
@@ -2681,7 +2668,7 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
}
if (isKnownNonZero(CI->getOperand(1), DL))
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
return nullptr;
}
@@ -2824,7 +2811,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
- annotateNonNullBasedOnAccess(CI, 0);
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
if (!CI->use_empty())
return nullptr;
@@ -2859,9 +2846,10 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
// Check for string/memory library functions.
if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
// Make sure we never change the calling convention.
- assert((ignoreCallingConv(Func) ||
- isCallingConvCCompatible(CI)) &&
- "Optimizing string/memory libcall would change the calling convention");
+ assert(
+ (ignoreCallingConv(Func) ||
+ TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) &&
+ "Optimizing string/memory libcall would change the calling convention");
switch (Func) {
case LibFunc_strcat:
return optimizeStrCat(CI, Builder);
@@ -3045,7 +3033,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
LibFunc Func;
Function *Callee = CI->getCalledFunction();
- bool isCallingConvC = isCallingConvCCompatible(CI);
+ bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
SmallVector<OperandBundleDef, 2> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
@@ -3063,7 +3051,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
// First, check for intrinsics.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
- if (!isCallingConvC)
+ if (!IsCallingConvC)
return nullptr;
// The FP intrinsics have corresponding constrained versions so we don't
// need to check for the StrictFP attribute here.
@@ -3116,7 +3104,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
// Then check for known library functions.
if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
// We never change the calling convention.
- if (!ignoreCallingConv(Func) && !isCallingConvC)
+ if (!ignoreCallingConv(Func) && !IsCallingConvC)
return nullptr;
if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
return V;
@@ -3500,7 +3488,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
LibFunc Func;
Function *Callee = CI->getCalledFunction();
- bool isCallingConvC = isCallingConvCCompatible(CI);
+ bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
SmallVector<OperandBundleDef, 2> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
@@ -3514,7 +3502,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
return nullptr;
// We never change the calling convention.
- if (!ignoreCallingConv(Func) && !isCallingConvC)
+ if (!ignoreCallingConv(Func) && !IsCallingConvC)
return nullptr;
switch (Func) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
index beeb60698f04..08a29ea16ba1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -14,45 +14,45 @@
using namespace llvm;
-cl::opt<bool> EnablePGSO(
+cl::opt<bool> llvm::EnablePGSO(
"pgso", cl::Hidden, cl::init(true),
cl::desc("Enable the profile guided size optimizations. "));
-cl::opt<bool> PGSOLargeWorkingSetSizeOnly(
+cl::opt<bool> llvm::PGSOLargeWorkingSetSizeOnly(
"pgso-lwss-only", cl::Hidden, cl::init(true),
cl::desc("Apply the profile guided size optimizations only "
"if the working set size is large (except for cold code.)"));
-cl::opt<bool> PGSOColdCodeOnly(
+cl::opt<bool> llvm::PGSOColdCodeOnly(
"pgso-cold-code-only", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only "
"to cold code."));
-cl::opt<bool> PGSOColdCodeOnlyForInstrPGO(
+cl::opt<bool> llvm::PGSOColdCodeOnlyForInstrPGO(
"pgso-cold-code-only-for-instr-pgo", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only "
"to cold code under instrumentation PGO."));
-cl::opt<bool> PGSOColdCodeOnlyForSamplePGO(
+cl::opt<bool> llvm::PGSOColdCodeOnlyForSamplePGO(
"pgso-cold-code-only-for-sample-pgo", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only "
"to cold code under sample PGO."));
-cl::opt<bool> PGSOColdCodeOnlyForPartialSamplePGO(
+cl::opt<bool> llvm::PGSOColdCodeOnlyForPartialSamplePGO(
"pgso-cold-code-only-for-partial-sample-pgo", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only "
"to cold code under partial-profile sample PGO."));
-cl::opt<bool> ForcePGSO(
+cl::opt<bool> llvm::ForcePGSO(
"force-pgso", cl::Hidden, cl::init(false),
cl::desc("Force the (profiled-guided) size optimizations. "));
-cl::opt<int> PgsoCutoffInstrProf(
+cl::opt<int> llvm::PgsoCutoffInstrProf(
"pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), cl::ZeroOrMore,
cl::desc("The profile guided size optimization profile summary cutoff "
"for instrumentation profile."));
-cl::opt<int> PgsoCutoffSampleProf(
+cl::opt<int> llvm::PgsoCutoffSampleProf(
"pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
cl::desc("The profile guided size optimization profile summary cutoff "
"for sample profile."));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp
index e2c387cb8983..32f2f4e233b2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -95,13 +95,12 @@ static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
// globalized.
// Try to balance pack those partitions into N files since this roughly equals
// thread balancing for the backend codegen step.
-static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
+static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap,
unsigned N) {
// At this point module should have the proper mix of globals and locals.
// As we attempt to partition this module, we must not change any
// locals to globals.
- LLVM_DEBUG(dbgs() << "Partition module with (" << M->size()
- << ")functions\n");
+ LLVM_DEBUG(dbgs() << "Partition module with (" << M.size() << ")functions\n");
ClusterMapType GVtoClusterMap;
ComdatMembersType ComdatMembers;
@@ -144,9 +143,9 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
};
- llvm::for_each(M->functions(), recordGVSet);
- llvm::for_each(M->globals(), recordGVSet);
- llvm::for_each(M->aliases(), recordGVSet);
+ llvm::for_each(M.functions(), recordGVSet);
+ llvm::for_each(M.globals(), recordGVSet);
+ llvm::for_each(M.aliases(), recordGVSet);
// Assigned all GVs to merged clusters while balancing number of objects in
// each.
@@ -247,31 +246,32 @@ static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
}
void llvm::SplitModule(
- std::unique_ptr<Module> M, unsigned N,
+ Module &M, unsigned N,
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
bool PreserveLocals) {
if (!PreserveLocals) {
- for (Function &F : *M)
+ for (Function &F : M)
externalize(&F);
- for (GlobalVariable &GV : M->globals())
+ for (GlobalVariable &GV : M.globals())
externalize(&GV);
- for (GlobalAlias &GA : M->aliases())
+ for (GlobalAlias &GA : M.aliases())
externalize(&GA);
- for (GlobalIFunc &GIF : M->ifuncs())
+ for (GlobalIFunc &GIF : M.ifuncs())
externalize(&GIF);
}
// This performs splitting without a need for externalization, which might not
// always be possible.
ClusterIDMapType ClusterIDMap;
- findPartitions(M.get(), ClusterIDMap, N);
+ findPartitions(M, ClusterIDMap, N);
// FIXME: We should be able to reuse M as the last partition instead of
- // cloning it.
+ // cloning it. Note that the callers at the moment expect the module to
+ // be preserved, so will need some adjustments as well.
for (unsigned I = 0; I < N; ++I) {
ValueToValueMapTy VMap;
std::unique_ptr<Module> MPart(
- CloneModule(*M, VMap, [&](const GlobalValue *GV) {
+ CloneModule(M, VMap, [&](const GlobalValue *GV) {
if (ClusterIDMap.count(GV))
return (ClusterIDMap[GV] == I);
else
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
deleted file mode 100644
index c57cec6be676..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UniqueInternalLinkageNames.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//===- UniqueInternalLinkageNames.cpp - Unique Internal Linkage Sym Names -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements unique naming of internal linkage symbols with option
-// -funique-internal-linkage-symbols.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MD5.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-
-using namespace llvm;
-
-static bool uniqueifyInternalLinkageNames(Module &M) {
- llvm::MD5 Md5;
- Md5.update(M.getSourceFileName());
- llvm::MD5::MD5Result R;
- Md5.final(R);
- SmallString<32> Str;
- llvm::MD5::stringifyResult(R, Str);
- // Convert MD5hash to Decimal. Demangler suffixes can either contain numbers
- // or characters but not both.
- APInt IntHash = APInt(128, Str.str(), 16);
- // Prepend "__uniq" before the hash for tools like profilers to understand that
- // this symbol is of internal linkage type.
- std::string ModuleNameHash = (Twine(".__uniq.") + Twine(IntHash.toString(10, false))).str();
- bool Changed = false;
- MDBuilder MDB(M.getContext());
-
- // Append the module hash to all internal linkage functions.
- for (auto &F : M) {
- if (F.hasInternalLinkage()) {
- F.setName(F.getName() + ModuleNameHash);
- F.addFnAttr("sample-profile-suffix-elision-policy", "selected");
- // Replace linkage names in the debug metadata.
- if (DISubprogram *SP = F.getSubprogram()) {
- if (SP->getRawLinkageName()) {
- auto *Name = MDB.createString(F.getName());
- SP->replaceRawLinkageName(Name);
- if (DISubprogram *SPDecl = SP->getDeclaration()) {
- if (SPDecl->getRawLinkageName())
- SPDecl->replaceRawLinkageName(Name);
- }
- }
- }
- Changed = true;
- }
- }
-
- // Append the module hash to all internal linkage globals.
- for (auto &GV : M.globals()) {
- if (GV.hasInternalLinkage()) {
- GV.setName(GV.getName() + ModuleNameHash);
- Changed = true;
- }
- }
- return Changed;
-}
-
-namespace {
-
-// Legacy pass that provides a name to every anon globals.
-class UniqueInternalLinkageNamesLegacyPass : public ModulePass {
-
-public:
- /// Pass identification, replacement for typeid
- static char ID;
-
- /// Specify pass name for debug output
- StringRef getPassName() const override {
- return "Unique Internal Linkage Names";
- }
-
- explicit UniqueInternalLinkageNamesLegacyPass() : ModulePass(ID) {
- initializeUniqueInternalLinkageNamesLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- return uniqueifyInternalLinkageNames(M);
- }
-};
-
-char UniqueInternalLinkageNamesLegacyPass::ID = 0;
-} // anonymous namespace
-
-PreservedAnalyses
-UniqueInternalLinkageNamesPass::run(Module &M, ModuleAnalysisManager &AM) {
- if (!uniqueifyInternalLinkageNames(M))
- return PreservedAnalyses::all();
-
- return PreservedAnalyses::none();
-}
-
-INITIALIZE_PASS_BEGIN(UniqueInternalLinkageNamesLegacyPass,
- "unique-internal-linkage-names",
- "Uniqueify internal linkage names", false, false)
-INITIALIZE_PASS_END(UniqueInternalLinkageNamesLegacyPass,
- "unique-internal-linkage-names",
- "Uniqueify Internal linkage names", false, false)
-
-namespace llvm {
-ModulePass *createUniqueInternalLinkageNamesPass() {
- return new UniqueInternalLinkageNamesLegacyPass();
-}
-} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
index 73c0532f3fd5..3ca36a1cad91 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp
@@ -45,7 +45,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeInjectTLIMappingsLegacyPass(Registry);
initializeFixIrreduciblePass(Registry);
initializeUnifyLoopExitsLegacyPassPass(Registry);
- initializeUniqueInternalLinkageNamesLegacyPassPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 61cd8595a73b..6336af25ef98 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -189,14 +189,6 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
if (StoreBase != LoadBase)
return -1;
- // If the load and store are to the exact same address, they should have been
- // a must alias. AA must have gotten confused.
- // FIXME: Study to see if/when this happens. One case is forwarding a memset
- // to a load from the base of the memset.
-
- // If the load and store don't overlap at all, the store doesn't provide
- // anything to the load. In this case, they really don't alias at all, AA
- // must have gotten confused.
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize();
if ((WriteSizeInBits & 7) | (LoadSize & 7))
@@ -204,15 +196,6 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
LoadSize /= 8;
- bool isAAFailure = false;
- if (StoreOffset < LoadOffset)
- isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
- else
- isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
-
- if (isAAFailure)
- return -1;
-
// If the Load isn't completely contained within the stored bits, we don't
// have all the bits to feed it. We could do something crazy in the future
// (issue a smaller load then merge the bits in) but this seems unlikely to be
@@ -221,6 +204,18 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
StoreOffset + StoreSize < LoadOffset + LoadSize)
return -1;
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused. The if statement above ensure the condition
+ // that StoreOffset <= LoadOffset.
+ if (StoreOffset + int64_t(StoreSize) <= LoadOffset)
+ return -1;
+
// Okay, we can do this transformation. Return the number of bytes into the
// store that the load is.
return LoadOffset - StoreOffset;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 930e0b7ee01a..f3afd42e6163 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -26,8 +26,8 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
@@ -37,6 +37,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
#include <cassert>
#include <limits>
#include <memory>
@@ -44,6 +45,8 @@
using namespace llvm;
+#define DEBUG_TYPE "value-mapper"
+
// Out of line method to get vtable etc for class.
void ValueMapTypeRemapper::anchor() {}
void ValueMaterializer::anchor() {}
@@ -366,7 +369,7 @@ Value *Mapper::mapValue(const Value *V) {
if (NewTy != IA->getFunctionType())
V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
IA->hasSideEffects(), IA->isAlignStack(),
- IA->getDialect());
+ IA->getDialect(), IA->canThrow());
}
return getVM()[V] = const_cast<Value *>(V);
@@ -390,6 +393,26 @@ Value *Mapper::mapValue(const Value *V) {
: MetadataAsValue::get(V->getContext(),
MDTuple::get(V->getContext(), None));
}
+ if (auto *AL = dyn_cast<DIArgList>(MD)) {
+ SmallVector<ValueAsMetadata *, 4> MappedArgs;
+ for (auto *VAM : AL->getArgs()) {
+ // Map both Local and Constant VAMs here; they will both ultimately
+ // be mapped via mapValue (apart from constants when we have no
+ // module level changes, which have an identity mapping).
+ if ((Flags & RF_NoModuleLevelChanges) && isa<ConstantAsMetadata>(VAM)) {
+ MappedArgs.push_back(VAM);
+ } else if (Value *LV = mapValue(VAM->getValue())) {
+ MappedArgs.push_back(
+ LV == VAM->getValue() ? VAM : ValueAsMetadata::get(LV));
+ } else {
+ // If we cannot map the value, set the argument as undef.
+ MappedArgs.push_back(ValueAsMetadata::get(
+ UndefValue::get(VAM->getValue()->getType())));
+ }
+ }
+ return MetadataAsValue::get(V->getContext(),
+ DIArgList::get(V->getContext(), MappedArgs));
+ }
// If this is a module-level metadata and we know that nothing at the module
// level is changing, then use an identity mapping.
@@ -412,6 +435,20 @@ Value *Mapper::mapValue(const Value *V) {
if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
return mapBlockAddress(*BA);
+ if (const auto *E = dyn_cast<DSOLocalEquivalent>(C)) {
+ auto *Val = mapValue(E->getGlobalValue());
+ GlobalValue *GV = dyn_cast<GlobalValue>(Val);
+ if (GV)
+ return getVM()[E] = DSOLocalEquivalent::get(GV);
+
+ auto *Func = cast<Function>(Val->stripPointerCastsAndAliases());
+ Type *NewTy = E->getType();
+ if (TypeMapper)
+ NewTy = TypeMapper->remapType(NewTy);
+ return getVM()[E] = llvm::ConstantExpr::getBitCast(
+ DSOLocalEquivalent::get(Func), NewTy);
+ }
+
auto mapValueOrNull = [this](Value *V) {
auto Mapped = mapValue(V);
assert((Mapped || (Flags & RF_NullMapMissingGlobalValues)) &&
@@ -533,23 +570,21 @@ Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {
return None;
}
-static Metadata *cloneOrBuildODR(const MDNode &N) {
- auto *CT = dyn_cast<DICompositeType>(&N);
- // If ODR type uniquing is enabled, we would have uniqued composite types
- // with identifiers during bitcode reading, so we can just use CT.
- if (CT && CT->getContext().isODRUniquingDebugTypes() &&
- CT->getIdentifier() != "")
- return const_cast<DICompositeType *>(CT);
- return MDNode::replaceWithDistinct(N.clone());
-}
-
MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {
assert(N.isDistinct() && "Expected a distinct node");
assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node");
- DistinctWorklist.push_back(
- cast<MDNode>((M.Flags & RF_MoveDistinctMDs)
- ? M.mapToSelf(&N)
- : M.mapToMetadata(&N, cloneOrBuildODR(N))));
+ Metadata *NewM = nullptr;
+
+ if (M.Flags & RF_ReuseAndMutateDistinctMDs) {
+ NewM = M.mapToSelf(&N);
+ } else {
+ NewM = MDNode::replaceWithDistinct(N.clone());
+ LLVM_DEBUG(dbgs() << "\nMap " << N << "\n"
+ << "To " << *NewM << "\n\n");
+ M.mapToMetadata(&N, NewM);
+ }
+ DistinctWorklist.push_back(cast<MDNode>(NewM));
+
return DistinctWorklist.back();
}
@@ -597,6 +632,9 @@ void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
for (unsigned I = 0, E = N.getNumOperands(); I != E; ++I) {
Metadata *Old = N.getOperand(I);
Metadata *New = mapOperand(Old);
+ if (Old != New)
+ LLVM_DEBUG(dbgs() << "Replacing Op " << Old << " with " << New << " in "
+ << N << "\n");
if (Old != New)
N.replaceOperandWith(I, New);
@@ -716,6 +754,11 @@ void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
});
auto *NewN = MDNode::replaceWithUniqued(std::move(ClonedN));
+ if (N && NewN && N != NewN) {
+ LLVM_DEBUG(dbgs() << "\nMap " << *N << "\n"
+ << "To " << *NewN << "\n\n");
+ }
+
M.mapToMetadata(N, NewN);
// Nodes that were referenced out of order in the POT are involved in a
@@ -902,7 +945,8 @@ void Mapper::remapInstruction(Instruction *I) {
AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
for (Attribute::AttrKind TypedAttr :
- {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
+ {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef,
+ Attribute::InAlloca}) {
if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
Attrs = Attrs.replaceAttributeType(C, i, TypedAttr,
TypeMapper->remapType(Ty));
@@ -988,8 +1032,8 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
Elements.push_back(NewV);
}
- GV.setInitializer(ConstantArray::get(
- cast<ArrayType>(GV.getType()->getElementType()), Elements));
+ GV.setInitializer(
+ ConstantArray::get(cast<ArrayType>(GV.getValueType()), Elements));
}
void Mapper::scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,