summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
commit706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch)
tree4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/Transforms/Scalar
parent7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff)
Notes
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/ADCE.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/BDCE.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp33
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantProp.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp10
-rw-r--r--llvm/lib/Transforms/Scalar/DCE.cpp41
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp78
-rw-r--r--llvm/lib/Transforms/Scalar/DivRemPairs.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/Float2Int.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp95
-rw-r--r--llvm/lib/Transforms/Scalar/GVNHoist.cpp10
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp99
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp50
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp329
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp87
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDeletion.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp87
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp927
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp138
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp253
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRerollPass.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSink.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp105
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LowerAtomic.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp894
-rw-r--r--llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp23
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp68
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/NaryReassociate.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp21
-rw-r--r--llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/Reg2Mem.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp134
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp37
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp33
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/Sink.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp1
72 files changed, 2122 insertions, 1568 deletions
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 7f7460c5746a7..cc3d3bf7cdbf1 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 0e9f03a060611..06deaf3c4f9a9 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,6 +15,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index 9bd387c33e80a..0fa38fa80b178 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -19,13 +19,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "bdce"
@@ -101,7 +102,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
(I.getType()->isIntOrIntVectorTy() &&
DB.getDemandedBits(&I).isNullValue() &&
wouldInstructionBeTriviallyDead(&I))) {
- salvageDebugInfo(I);
+ salvageDebugInfoOrMarkUndef(I);
Worklist.push_back(&I);
I.dropAllReferences();
Changed = true;
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index c3fba923104fb..e34c011b1c871 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -59,13 +59,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 9f340afbf7c28..5bfece010bec2 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -14,7 +14,7 @@
// cost. If the constant can be folded into the instruction (the cost is
// TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't
// consider it expensive and leave it alone. This is the default behavior and
-// the default implementation of getIntImmCost will always return TCC_Free.
+// the default implementation of getIntImmCostInst will always return TCC_Free.
//
// If the cost is more than TCC_BASIC, then the integer constant can't be folded
// into the instruction and it might be beneficial to hoist the constant.
@@ -43,7 +43,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -54,6 +53,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/Casting.h"
@@ -61,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -361,11 +362,11 @@ void ConstantHoistingPass::collectConstantCandidates(
// Ask the target about the cost of materializing the constant for the given
// instruction and operand index.
if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
- Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
- ConstInt->getValue(), ConstInt->getType());
+ Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx,
+ ConstInt->getValue(), ConstInt->getType());
else
- Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
- ConstInt->getType());
+ Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(),
+ ConstInt->getType());
// Ignore cheap integer constants.
if (Cost > TargetTransformInfo::TCC_Basic) {
@@ -415,7 +416,7 @@ void ConstantHoistingPass::collectConstantCandidates(
// usually lowered to a load from constant pool. Such operation is unlikely
// to be cheaper than compute it by <Base + Offset>, which can be lowered to
// an ADD instruction or folded into Load/Store instruction.
- int Cost = TTI->getIntImmCost(Instruction::Add, 1, Offset, PtrIntTy);
+ int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy);
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
ConstCandMapType::iterator Itr;
bool Inserted;
@@ -486,9 +487,10 @@ void ConstantHoistingPass::collectConstantCandidates(
// Scan all operands.
for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
// The cost of materializing the constants (defined in
- // `TargetTransformInfo::getIntImmCost`) for instructions which only take
- // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So
- // it's safe for us to collect constant candidates from all IntrinsicInsts.
+ // `TargetTransformInfo::getIntImmCostInst`) for instructions which only
+ // take constant variables is lower than `TargetTransformInfo::TCC_Basic`.
+ // So it's safe for us to collect constant candidates from all
+ // IntrinsicInsts.
if (canReplaceOperandWithVariable(Inst, Idx) || isa<IntrinsicInst>(Inst)) {
collectConstantCandidates(ConstCandMap, Inst, Idx);
}
@@ -499,9 +501,13 @@ void ConstantHoistingPass::collectConstantCandidates(
/// into an instruction itself.
void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
ConstCandMapType ConstCandMap;
- for (BasicBlock &BB : Fn)
+ for (BasicBlock &BB : Fn) {
+ // Ignore unreachable basic blocks.
+ if (!DT->isReachableFromEntry(&BB))
+ continue;
for (Instruction &Inst : BB)
collectConstantCandidates(ConstCandMap, &Inst);
+ }
}
// This helper function is necessary to deal with values that have different
@@ -552,7 +558,8 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
unsigned NumUses = 0;
bool OptForSize = Entry->getParent()->hasOptSize() ||
- llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+ llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
if (!OptForSize || std::distance(S,E) > 100) {
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
NumUses += ConstCand->Uses.size();
@@ -575,7 +582,7 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
for (auto User : ConstCand->Uses) {
unsigned Opcode = User.Inst->getOpcode();
unsigned OpndIdx = User.OpndIdx;
- Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
+ Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty);
LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n");
for (auto C2 = S; C2 != E; ++C2) {
diff --git a/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index e9e6afe3fdd41..73bf1d521b1d0 100644
--- a/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2ef85268df483..3435bc7f5eaaa 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -194,7 +195,14 @@ static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
}
// All constant incoming values map to the same variable along the incoming
- // edges of the phi. The phi is unnecessary.
+ // edges of the phi. The phi is unnecessary. However, we must drop all
+ // poison-generating flags to ensure that no poison is propagated to the phi
+ // location by performing this substitution.
+ // Warning: If the underlying analysis changes, this may not be enough to
+ // guarantee that poison is not propagated.
+ // TODO: We may be able to re-infer flags by re-analyzing the instruction.
+ if (auto *CommonInst = dyn_cast<Instruction>(CommonValue))
+ CommonInst->dropPoisonGeneratingFlags();
P->replaceAllUsesWith(CommonValue);
P->eraseFromParent();
++NumPhiCommon;
diff --git a/llvm/lib/Transforms/Scalar/DCE.cpp b/llvm/lib/Transforms/Scalar/DCE.cpp
index a79d775aa7f3a..a4b0c8df98f64 100644
--- a/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -19,12 +19,14 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "dce"
@@ -80,6 +82,43 @@ Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
}
+//===--------------------------------------------------------------------===//
+// RedundantDbgInstElimination pass implementation
+//
+
+namespace {
+struct RedundantDbgInstElimination : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ RedundantDbgInstElimination() : FunctionPass(ID) {
+ initializeRedundantDbgInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ bool Changed = false;
+ for (auto &BB : F)
+ Changed |= RemoveRedundantDbgInstrs(&BB);
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char RedundantDbgInstElimination::ID = 0;
+INITIALIZE_PASS(RedundantDbgInstElimination, "redundant-dbg-inst-elim",
+ "Redundant Dbg Instruction Elimination", false, false)
+
+Pass *llvm::createRedundantDbgInstEliminationPass() {
+ return new RedundantDbgInstElimination();
+}
+
+//===--------------------------------------------------------------------===//
+// DeadCodeElimination pass implementation
+//
+
static bool DCEInstruction(Instruction *I,
SmallSetVector<Instruction *, 16> &WorkList,
const TargetLibraryInfo *TLI) {
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 685de82810ede..1ba4aab999e14 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -48,6 +49,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -99,6 +101,7 @@ static void
deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst,
SmallSetVector<const Value *, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -112,6 +115,10 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ // Mark the DeadInst as dead in the list of throwable instructions.
+ auto It = ThrowableInst.find(DeadInst);
+ if (It != ThrowableInst.end())
+ ThrowableInst[It->first] = false;
++NumFastOther;
// Try to preserve debug information attached to the dead instruction.
@@ -144,6 +151,9 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
DeadInst->eraseFromParent();
} while (!NowDeadInsts.empty());
*BBI = NewIter;
+ // Pop dead entries from back of ThrowableInst till we find an alive entry.
+ while (!ThrowableInst.empty() && !ThrowableInst.back().second)
+ ThrowableInst.pop_back();
}
/// Does this instruction write some memory? This only returns true for things
@@ -169,15 +179,18 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
}
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
- StringRef FnName = F->getName();
- if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
- return true;
- if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
- return true;
- if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
- return true;
- if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
- return true;
+ LibFunc LF;
+ if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) {
+ switch (LF) {
+ case LibFunc_strcpy:
+ case LibFunc_strncpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ return true;
+ default:
+ return false;
+ }
+ }
}
}
return false;
@@ -656,7 +669,8 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
static bool handleFree(CallInst *F, AliasAnalysis *AA,
MemoryDependenceResults *MD, DominatorTree *DT,
const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst) {
bool MadeChange = false;
MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -690,7 +704,8 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
// DCE instructions only used to calculate that store.
BasicBlock::iterator BBI(Dependency);
- deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
++NumFastStores;
MadeChange = true;
@@ -747,8 +762,8 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
MemoryDependenceResults *MD,
const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL,
- OrderedBasicBlock &OBB) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst) {
bool MadeChange = false;
// Keep track of all of the stack objects that are dead at the end of the
@@ -809,7 +824,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
<< '\n');
// DCE instructions only used to calculate that store.
- deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB,
+ deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst,
&DeadStackObjects);
++NumFastStores;
MadeChange = true;
@@ -821,7 +836,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
if (isInstructionTriviallyDead(&*BBI, TLI)) {
LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: "
<< *&*BBI << '\n');
- deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB,
+ deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst,
&DeadStackObjects);
++NumFastOther;
MadeChange = true;
@@ -1028,7 +1043,8 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
InstOverlapIntervalsTy &IOL,
- OrderedBasicBlock &OBB) {
+ OrderedBasicBlock &OBB,
+ MapVector<Instruction *, bool> &ThrowableInst) {
// Must be a store instruction.
StoreInst *SI = dyn_cast<StoreInst>(Inst);
if (!SI)
@@ -1044,7 +1060,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
<< *DepLoad << "\n STORE: " << *SI << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst);
++NumRedundantStores;
return true;
}
@@ -1062,7 +1078,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB, ThrowableInst);
++NumRedundantStores;
return true;
}
@@ -1077,7 +1093,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
bool MadeChange = false;
OrderedBasicBlock OBB(&BB);
- Instruction *LastThrowing = nullptr;
+ MapVector<Instruction *, bool> ThrowableInst;
// A map of interval maps representing partially-overwritten value parts.
InstOverlapIntervalsTy IOL;
@@ -1086,7 +1102,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(&*BBI, TLI)) {
- MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB);
+ MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB, ThrowableInst);
// Increment BBI after handleFree has potentially deleted instructions.
// This ensures we maintain a valid iterator.
++BBI;
@@ -1096,7 +1112,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Instruction *Inst = &*BBI++;
if (Inst->mayThrow()) {
- LastThrowing = Inst;
+ ThrowableInst[Inst] = true;
continue;
}
@@ -1105,7 +1121,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
continue;
// eliminateNoopStore will update in iterator, if necessary.
- if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB)) {
+ if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB,
+ ThrowableInst)) {
MadeChange = true;
continue;
}
@@ -1148,6 +1165,12 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
if (!DepLoc.Ptr)
break;
+ // Find the last throwable instruction not removed by call to
+ // deleteDeadInstruction.
+ Instruction *LastThrowing = nullptr;
+ if (!ThrowableInst.empty())
+ LastThrowing = ThrowableInst.back().first;
+
// Make sure we don't look past a call which might throw. This is an
// issue because MemoryDependenceAnalysis works in the wrong direction:
// it finds instructions which dominate the current instruction, rather than
@@ -1187,7 +1210,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
<< "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
++NumFastStores;
MadeChange = true;
@@ -1269,8 +1293,10 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
OBB.replaceInstruction(DepWrite, SI);
// Delete the old stores and now-dead instructions that feed them.
- deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB);
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB,
+ ThrowableInst);
MadeChange = true;
// We erased DepWrite and Inst (Loc); start over.
@@ -1305,7 +1331,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
- MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB);
+ MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB, ThrowableInst);
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index 934853507478b..132dfc8f6da10 100644
--- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index ce540683dae26..40c1ba88354f1 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -55,6 +55,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <deque>
#include <memory>
@@ -906,8 +907,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
- if (!salvageDebugInfo(*Inst))
- replaceDbgUsesWithUndef(Inst);
+
+ salvageDebugInfoOrMarkUndef(*Inst);
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e6abf1ceb026d..72512430b3662 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 4d2eac0451dfc..af223cc837f2c 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "float2int"
#include "llvm/Transforms/Scalar/Float2Int.h"
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 743353eaea225..1e6aab14e7b44 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -64,6 +64,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -112,7 +113,7 @@ static cl::opt<uint32_t> MaxNumDeps(
struct llvm::GVN::Expression {
uint32_t opcode;
- Type *type;
+ Type *type = nullptr;
bool commutative = false;
SmallVector<uint32_t, 4> varargs;
@@ -173,7 +174,7 @@ struct llvm::gvn::AvailableValue {
PointerIntPair<Value *, 2, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
+ unsigned Offset = 0;
static AvailableValue get(Value *V, unsigned Offset = 0) {
AvailableValue Res;
@@ -237,7 +238,7 @@ struct llvm::gvn::AvailableValue {
/// the associated BasicBlock.
struct llvm::gvn::AvailableValueInBlock {
/// BB - The basic block in question.
- BasicBlock *BB;
+ BasicBlock *BB = nullptr;
/// AV - The actual available value
AvailableValue AV;
@@ -364,6 +365,7 @@ GVN::ValueTable::ValueTable() = default;
GVN::ValueTable::ValueTable(const ValueTable &) = default;
GVN::ValueTable::ValueTable(ValueTable &&) = default;
GVN::ValueTable::~ValueTable() = default;
+GVN::ValueTable &GVN::ValueTable::operator=(const GVN::ValueTable &Arg) = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
@@ -1387,6 +1389,59 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
+static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
+ if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_EQ)
+ return true;
+
+ // Floating point comparisons can be equal, but not equivalent. Cases:
+ // NaNs for unordered operators
+ // +0.0 vs 0.0 for all operators
+ if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
+ (Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
+ Cmp->getFastMathFlags().noNaNs())) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // If we can prove either side non-zero, then equality must imply
+ // equivalence.
+ // FIXME: We should do this optimization if 'no signed zeros' is
+ // applicable via an instruction-level fast-math-flag or some other
+ // indicator that relaxed FP semantics are being used.
+ if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
+ return true;
+ if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
+ return true;;
+ // TODO: Handle vector floating point constants
+ }
+ return false;
+}
+
+static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
+ if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_NE)
+ return true;
+
+ // Floating point comparisons can be equal, but not equivelent. Cases:
+ // NaNs for unordered operators
+ // +0.0 vs 0.0 for all operators
+ if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
+ Cmp->getFastMathFlags().noNaNs()) ||
+ Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // If we can prove either side non-zero, then equality must imply
+ // equivalence.
+ // FIXME: We should do this optimization if 'no signed zeros' is
+ // applicable via an instruction-level fast-math-flag or some other
+ // indicator that relaxed FP semantics are being used.
+ if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
+ return true;
+ if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
+ return true;;
+ // TODO: Handle vector floating point constants
+ }
+ return false;
+}
+
+
static bool hasUsersIn(Value *V, BasicBlock *BB) {
for (User *U : V->users())
if (isa<Instruction>(U) &&
@@ -1451,10 +1506,7 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
// call void @llvm.assume(i1 %cmp)
// ret float %load ; will change it to ret float %0
if (auto *CmpI = dyn_cast<CmpInst>(V)) {
- if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
- CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
- (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
- CmpI->getFastMathFlags().noNaNs())) {
+ if (impliesEquivalanceIfTrue(CmpI)) {
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
// Heuristically pick the better replacement -- the choice of heuristic
@@ -1481,12 +1533,6 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
return Changed;
- // +0.0 and -0.0 compare equal, but do not imply equivalence. Unless we
- // can prove equivalence, bail.
- if (CmpRHS->getType()->isFloatTy() &&
- (!isa<ConstantFP>(CmpRHS) || cast<ConstantFP>(CmpRHS)->isZero()))
- return Changed;
-
LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
<< *CmpLHS << " with "
<< *CmpRHS << " in block "
@@ -1875,27 +1921,12 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
// If "A == B" is known true, or "A != B" is known false, then replace
- // A with B everywhere in the scope.
- if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
- (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
+ // A with B everywhere in the scope. For floating point operations, we
+ // have to be careful since equality does not always imply equivalance.
+ if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) ||
+ (isKnownFalse && impliesEquivalanceIfFalse(Cmp)))
Worklist.push_back(std::make_pair(Op0, Op1));
- // Handle the floating point versions of equality comparisons too.
- if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
- (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
-
- // Floating point -0.0 and 0.0 compare equal, so we can only
- // propagate values if we know that we have a constant and that
- // its value is non-zero.
-
- // FIXME: We should do this optimization if 'no signed zeros' is
- // applicable via an instruction-level fast-math-flag or some other
- // indicator that relaxed FP semantics are being used.
-
- if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
- Worklist.push_back(std::make_pair(Op0, Op1));
- }
-
// If "A >= B" is known true, replace "A < B" with false everywhere.
CmpInst::Predicate NotPred = Cmp->getInversePredicate();
Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index c87e41484b132..e1796f6bf05a8 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -47,7 +47,6 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -65,6 +64,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -72,6 +72,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -956,7 +957,8 @@ private:
if (MoveAccess && NewMemAcc) {
// The definition of this ld/st will not change: ld/st hoisting is
// legal when the ld/st is not moved past its current definition.
- MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::End);
+ MSSAUpdater->moveToPlace(NewMemAcc, DestBB,
+ MemorySSA::BeforeTerminator);
}
// Replace all other instructions with Repl with memory access NewMemAcc.
@@ -1067,6 +1069,9 @@ private:
++NI;
}
+ if (MSSA && VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
+
NumHoisted += NL + NS + NC + NI;
NumRemoved += NR;
NumLoadsHoisted += NL;
@@ -1168,6 +1173,7 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
}
};
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 054025755c69a..6d0a4975e2668 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -47,7 +47,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -59,6 +58,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
@@ -71,6 +71,7 @@
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 2697d78095681..a3eba27a4d90c 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -39,7 +39,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/GuardWidening.h"
-#include <functional>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -53,11 +52,15 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <functional>
using namespace llvm;
@@ -66,22 +69,6 @@ using namespace llvm;
STATISTIC(GuardsEliminated, "Number of eliminated guards");
STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
-static cl::opt<bool> WidenFrequentBranches(
- "guard-widening-widen-frequent-branches", cl::Hidden,
- cl::desc("Widen conditions of explicit branches into dominating guards in "
- "case if their taken frequency exceeds threshold set by "
- "guard-widening-frequent-branch-threshold option"),
- cl::init(false));
-
-static cl::opt<unsigned> FrequentBranchThreshold(
- "guard-widening-frequent-branch-threshold", cl::Hidden,
- cl::desc("When WidenFrequentBranches is set to true, this option is used "
- "to determine which branches are frequently taken. The criteria "
- "that a branch is taken more often than "
- "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then "
- "it is considered frequently taken"),
- cl::init(1000));
-
static cl::opt<bool>
WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
cl::desc("Whether or not we should widen guards "
@@ -97,15 +84,16 @@ static Value *getCondition(Instruction *I) {
"Bad guard intrinsic?");
return GI->getArgOperand(0);
}
- if (isGuardAsWidenableBranch(I)) {
- auto *Cond = cast<BranchInst>(I)->getCondition();
- return cast<BinaryOperator>(Cond)->getOperand(0);
- }
+ Value *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB))
+ return Cond;
+
return cast<BranchInst>(I)->getCondition();
}
// Set the condition for \p I to \p NewCond. \p I can either be a guard or a
-// conditional branch.
+// conditional branch.
static void setCondition(Instruction *I, Value *NewCond) {
if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
@@ -126,7 +114,6 @@ class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
LoopInfo &LI;
- BranchProbabilityInfo *BPI;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
@@ -271,26 +258,22 @@ class GuardWideningImpl {
void widenGuard(Instruction *ToWiden, Value *NewCondition,
bool InvertCondition) {
Value *Result;
+
widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
InvertCondition);
- Value *WidenableCondition = nullptr;
if (isGuardAsWidenableBranch(ToWiden)) {
- auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
- WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+ setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
+ return;
}
- if (WidenableCondition)
- Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
- "guard.chk", ToWiden);
setCondition(ToWiden, Result);
}
public:
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
- LoopInfo &LI, BranchProbabilityInfo *BPI,
- DomTreeNode *Root,
+ LoopInfo &LI, DomTreeNode *Root,
std::function<bool(BasicBlock*)> BlockFilter)
- : DT(DT), PDT(PDT), LI(LI), BPI(BPI), Root(Root), BlockFilter(BlockFilter)
+ : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
{}
/// The entry point for this pass.
@@ -309,13 +292,6 @@ static bool isSupportedGuardInstruction(const Instruction *Insn) {
bool GuardWideningImpl::run() {
DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
- Optional<BranchProbability> LikelyTaken = None;
- if (WidenFrequentBranches && BPI) {
- unsigned Threshold = FrequentBranchThreshold;
- assert(Threshold > 0 && "Zero threshold makes no sense!");
- LikelyTaken = BranchProbability(Threshold - 1, Threshold);
- }
-
for (auto DFI = df_begin(Root), DFE = df_end(Root);
DFI != DFE; ++DFI) {
auto *BB = (*DFI)->getBlock();
@@ -330,17 +306,6 @@ bool GuardWideningImpl::run() {
for (auto *II : CurrentList)
Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
- if (WidenFrequentBranches && BPI)
- if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isConditional()) {
- // If one of branches of a conditional is likely taken, try to
- // eliminate it.
- if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
- Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock);
- else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
- Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock,
- /*InvertCondition*/true);
- }
}
assert(EliminatedGuardsAndBranches.empty() || Changed);
@@ -805,10 +770,7 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = AM.getCachedResult<BranchProbabilityAnalysis>(F);
- if (!GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
+ if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run())
return PreservedAnalyses::all();
@@ -820,22 +782,13 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
-
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function &F = *L.getHeader()->getParent();
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F);
-
BasicBlock *RootBB = L.getLoopPredecessor();
if (!RootBB)
RootBB = L.getHeader();
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L.contains(BB);
};
- if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI,
- AR.DT.getNode(RootBB),
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
BlockFilter).run())
return PreservedAnalyses::all();
@@ -856,10 +809,7 @@ struct GuardWideningLegacyPass : public FunctionPass {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- return GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(),
+ return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
[](BasicBlock*) { return true; } ).run();
}
@@ -868,8 +818,6 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- if (WidenFrequentBranches)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
}
};
@@ -895,16 +843,11 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
- BranchProbabilityInfo *BPI = nullptr;
- if (WidenFrequentBranches)
- BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- return GuardWideningImpl(DT, PDT, LI, BPI,
+ return GuardWideningImpl(DT, PDT, LI,
DT.getNode(RootBB), BlockFilter).run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (WidenFrequentBranches)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addPreserved<PostDominatorTreeWrapperPass>();
@@ -920,8 +863,6 @@ INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards",
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-if (WidenFrequentBranches)
- INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards",
false, false)
@@ -931,8 +872,6 @@ INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening",
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-if (WidenFrequentBranches)
- INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening",
"Widen guards (within a single loop, as a loop pass)",
false, false)
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 5519a00c12c97..d8d7acae5c9fd 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,8 +31,8 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -44,7 +44,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
@@ -68,6 +67,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -79,6 +79,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include <cassert>
@@ -125,10 +126,9 @@ DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
cl::desc("Disable Linear Function Test Replace optimization"));
static cl::opt<bool>
-LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false),
+LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
-
namespace {
struct RewritePhi;
@@ -2663,11 +2663,11 @@ static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
// merge the max and exact information to approximate a version of
// getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
SmallVector<const SCEV*, 4> ExitCounts;
- const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L);
- if (!isa<SCEVCouldNotCompute>(MaxConstEC))
- ExitCounts.push_back(MaxConstEC);
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ ExitCount = SE.getExitCount(L, ExitingBB,
+ ScalarEvolution::ConstantMaximum);
if (!isa<SCEVCouldNotCompute>(ExitCount)) {
assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
"We should only have known counts for exiting blocks that "
@@ -2835,6 +2835,10 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
!isSafeToExpand(ExactBTC, *SE))
return Changed;
+ // If we end up with a pointer exit count, bail. It may be unsized.
+ if (!ExactBTC->getType()->isIntegerTy())
+ return Changed;
+
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
@@ -2865,6 +2869,10 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
!isSafeToExpand(ExitCount, *SE))
return true;
+ // If we end up with a pointer exit count, bail. It may be unsized.
+ if (!ExitCount->getType()->isIntegerTy())
+ return true;
+
return false;
};
@@ -2936,7 +2944,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// varying check.
Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
IRBuilder<> B(L->getLoopPreheader()->getTerminator());
- Value *ExactBTCV = nullptr; //lazy generated if needed
+ Value *ExactBTCV = nullptr; // Lazily generated if needed.
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
@@ -2991,15 +2999,15 @@ bool IndVarSimplify::run(Loop *L) {
if (!L->isLoopSimplifyForm())
return false;
- // If there are any floating-point recurrences, attempt to
- // transform them to use integer recurrences.
- Changed |= rewriteNonIntegerIVs(L);
-
#ifndef NDEBUG
// Used below for a consistency check only
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
#endif
+ // If there are any floating-point recurrences, attempt to
+ // transform them to use integer recurrences.
+ Changed |= rewriteNonIntegerIVs(L);
+
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, DL, "indvars");
#ifndef NDEBUG
@@ -3026,11 +3034,19 @@ bool IndVarSimplify::run(Loop *L) {
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
// Try to eliminate loop exits based on analyzeable exit counts
- Changed |= optimizeLoopExits(L, Rewriter);
+ if (optimizeLoopExits(L, Rewriter)) {
+ Changed = true;
+ // Given we've changed exit counts, notify SCEV
+ SE->forgetLoop(L);
+ }
// Try to form loop invariant tests for loop exits by changing how many
// iterations of the loop run when that is unobservable.
- Changed |= predicateLoopExits(L, Rewriter);
+ if (predicateLoopExits(L, Rewriter)) {
+ Changed = true;
+ // Given we've changed exit counts, notify SCEV
+ SE->forgetLoop(L);
+ }
// If we have a trip count expression, rewrite the loop's exit condition
// using it.
@@ -3118,7 +3134,8 @@ bool IndVarSimplify::run(Loop *L) {
"Indvars did not preserve LCSSA!");
// Verify that LFTR, and any other change have not interfered with SCEV's
- // ability to compute trip count.
+ // ability to compute trip count. We may have *changed* the exit count, but
+ // only by reducing it.
#ifndef NDEBUG
if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
@@ -3130,7 +3147,8 @@ bool IndVarSimplify::run(Loop *L) {
else
BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
NewBECount->getType());
- assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
+ assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
+ NewBECount) && "indvars must preserve SCEV");
}
#endif
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 997d68838152f..58469749600e5 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -74,6 +74,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index e7e73a132fbec..dfb1b6bfb7398 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -141,11 +141,11 @@ using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
/// InferAddressSpaces
class InferAddressSpaces : public FunctionPass {
- const TargetTransformInfo *TTI;
+ const TargetTransformInfo *TTI = nullptr;
/// Target specific address space which uses of should be replaced if
/// possible.
- unsigned FlatAddrSpace;
+ unsigned FlatAddrSpace = 0;
public:
static char ID;
@@ -791,8 +791,8 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
- B.CreateMemSet(NewV, MSI->getValue(),
- MSI->getLength(), MSI->getDestAlignment(),
+ B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(),
+ MaybeAlign(MSI->getDestAlignment()),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
@@ -808,15 +808,13 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
if (isa<MemCpyInst>(MTI)) {
MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
- B.CreateMemCpy(Dest, MTI->getDestAlignment(),
- Src, MTI->getSourceAlignment(),
+ B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),
MTI->getLength(),
false, // isVolatile
TBAA, TBAAStruct, ScopeMD, NoAliasMD);
} else {
assert(isa<MemMoveInst>(MTI));
- B.CreateMemMove(Dest, MTI->getDestAlignment(),
- Src, MTI->getSourceAlignment(),
+ B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(),
MTI->getLength(),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index ec28f790f2522..e8bbf2936da65 100644
--- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 0cf00baaa24a0..98c2fcb3dae0f 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -55,6 +55,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -305,14 +306,13 @@ bool JumpThreading::runOnFunction(Function &F) {
DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = F.hasProfileData();
- if (HasProfileData) {
+ if (F.hasProfileData()) {
LoopInfo LI{DominatorTree(F)};
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, HasProfileData,
+ bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -339,7 +339,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, HasProfileData,
+ bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (!Changed)
@@ -1002,49 +1002,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- const Instruction *TI = SinglePred->getTerminator();
- if (!TI->isExceptionalTerminator() && TI->getNumSuccessors() == 1 &&
- SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
- // If SinglePred was a loop header, BB becomes one.
- if (LoopHeaders.erase(SinglePred))
- LoopHeaders.insert(BB);
-
- LVI->eraseBlock(SinglePred);
- MergeBasicBlockIntoOnlyPred(BB, DTU);
-
- // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
- // BB code within one basic block `BB`), we need to invalidate the LVI
- // information associated with BB, because the LVI information need not be
- // true for all of BB after the merge. For example,
- // Before the merge, LVI info and code is as follows:
- // SinglePred: <LVI info1 for %p val>
- // %y = use of %p
- // call @exit() // need not transfer execution to successor.
- // assume(%p) // from this point on %p is true
- // br label %BB
- // BB: <LVI info2 for %p val, i.e. %p is true>
- // %x = use of %p
- // br label exit
- //
- // Note that this LVI info for blocks BB and SinglPred is correct for %p
- // (info2 and info1 respectively). After the merge and the deletion of the
- // LVI info1 for SinglePred. We have the following code:
- // BB: <LVI info2 for %p val>
- // %y = use of %p
- // call @exit()
- // assume(%p)
- // %x = use of %p <-- LVI info2 is correct from here onwards.
- // br label exit
- // LVI info2 for BB is incorrect at the beginning of BB.
-
- // Invalidate LVI information for BB if the LVI is not provably true for
- // all of BB.
- if (!isGuaranteedToTransferExecutionToSuccessor(BB))
- LVI->eraseBlock(BB);
- return true;
- }
- }
+ if (MaybeMergeBasicBlockIntoOnlyPred(BB))
+ return true;
if (TryToUnfoldSelectInCurrBB(BB))
return true;
@@ -1758,7 +1717,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
getSuccessor(GetBestDestForJumpOnUndef(BB));
// Ok, try to thread it!
- return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+ return TryThreadEdge(BB, PredsToFactor, MostPopularDest);
}
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
@@ -1920,12 +1879,146 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
}
-/// ThreadEdge - We have decided that it is safe and profitable to factor the
-/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
-/// across BB. Transform the IR to reflect this change.
-bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock *> &PredBBs,
- BasicBlock *SuccBB) {
+/// Merge basic block BB into its sole predecessor if possible.
+bool JumpThreadingPass::MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+ if (!SinglePred)
+ return false;
+
+ const Instruction *TI = SinglePred->getTerminator();
+ if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 ||
+ SinglePred == BB || hasAddressTakenAndUsed(BB))
+ return false;
+
+ // If SinglePred was a loop header, BB becomes one.
+ if (LoopHeaders.erase(SinglePred))
+ LoopHeaders.insert(BB);
+
+ LVI->eraseBlock(SinglePred);
+ MergeBasicBlockIntoOnlyPred(BB, DTU);
+
+ // Now that BB is merged into SinglePred (i.e. SinglePred code followed by
+ // BB code within one basic block `BB`), we need to invalidate the LVI
+ // information associated with BB, because the LVI information need not be
+ // true for all of BB after the merge. For example,
+ // Before the merge, LVI info and code is as follows:
+ // SinglePred: <LVI info1 for %p val>
+ // %y = use of %p
+ // call @exit() // need not transfer execution to successor.
+ // assume(%p) // from this point on %p is true
+ // br label %BB
+ // BB: <LVI info2 for %p val, i.e. %p is true>
+ // %x = use of %p
+ // br label exit
+ //
+ // Note that this LVI info for blocks BB and SinglPred is correct for %p
+ // (info2 and info1 respectively). After the merge and the deletion of the
+ // LVI info1 for SinglePred. We have the following code:
+ // BB: <LVI info2 for %p val>
+ // %y = use of %p
+ // call @exit()
+ // assume(%p)
+ // %x = use of %p <-- LVI info2 is correct from here onwards.
+ // br label exit
+ // LVI info2 for BB is incorrect at the beginning of BB.
+
+ // Invalidate LVI information for BB if the LVI is not provably true for
+ // all of BB.
+ if (!isGuaranteedToTransferExecutionToSuccessor(BB))
+ LVI->eraseBlock(BB);
+ return true;
+}
+
+/// Update the SSA form. NewBB contains instructions that are copied from BB.
+/// ValueMapping maps old values in BB to new ones in NewBB.
+void JumpThreadingPass::UpdateSSA(
+ BasicBlock *BB, BasicBlock *NewBB,
+ DenseMap<Instruction *, Value *> &ValueMapping) {
+ // If there were values defined in BB that are used outside the block, then we
+ // now have to update all uses of the value to use either the original value,
+ // the cloned value, or some PHI derived value. This can require arbitrary
+ // PHI insertion, of which we are prepared to do, clean these up now.
+ SSAUpdater SSAUpdate;
+ SmallVector<Use *, 16> UsesToRename;
+
+ for (Instruction &I : *BB) {
+ // Scan all uses of this instruction to see if it is used outside of its
+ // block, and if so, record them in UsesToRename.
+ for (Use &U : I.uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(U) == BB)
+ continue;
+ } else if (User->getParent() == BB)
+ continue;
+
+ UsesToRename.push_back(&U);
+ }
+
+ // If there are no uses outside the block, we're done with this instruction.
+ if (UsesToRename.empty())
+ continue;
+ LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
+
+ // We found a use of I outside of BB. Rename all uses of I that are outside
+ // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
+ // with the two values we know.
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+}
+
+/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
+/// arguments that come from PredBB. Return the map from the variables in the
+/// source basic block to the variables in the newly created basic block.
+DenseMap<Instruction *, Value *>
+JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
+ BasicBlock::iterator BE, BasicBlock *NewBB,
+ BasicBlock *PredBB) {
+ // We are going to have to map operands from the source basic block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in the source basic
+ // block, evaluate them to account for entry from PredBB.
+ DenseMap<Instruction *, Value *> ValueMapping;
+
+ // Clone the phi nodes of the source basic block into NewBB. The resulting
+ // phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater
+ // might need to rewrite the operand of the cloned phi.
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
+ ValueMapping[PN] = NewPN;
+ }
+
+ // Clone the non-phi instructions of the source basic block into NewBB,
+ // keeping track of the mapping and using it to remap operands in the cloned
+ // instructions.
+ for (; BI != BE; ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ NewBB->getInstList().push_back(New);
+ ValueMapping[&*BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ return ValueMapping;
+}
+
+/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
+bool JumpThreadingPass::TryThreadEdge(
+ BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
+ BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
@@ -1955,6 +2048,21 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
return false;
}
+ ThreadEdge(BB, PredBBs, SuccBB);
+ return true;
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+void JumpThreadingPass::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &PredBBs,
+ BasicBlock *SuccBB) {
+ assert(SuccBB != BB && "Don't create an infinite loop");
+
+ assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
+ "Don't thread across loop headers");
+
// And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1968,7 +2076,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// And finally, do it!
LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()
<< "' to '" << SuccBB->getName()
- << "' with cost: " << JumpThreadCost
<< ", across block:\n " << *BB << "\n");
if (DTU->hasPendingDomTreeUpdates())
@@ -1977,11 +2084,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
LVI->enableDT();
LVI->threadEdge(PredBB, BB, SuccBB);
- // We are going to have to map operands from the original BB block to the new
- // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
- // account for entry from PredBB.
- DenseMap<Instruction*, Value*> ValueMapping;
-
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
BB->getName()+".thread",
BB->getParent(), BB);
@@ -1994,32 +2096,9 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
- BasicBlock::iterator BI = BB->begin();
- // Clone the phi nodes of BB into NewBB. The resulting phi nodes are trivial,
- // since NewBB only has one predecessor, but SSAUpdater might need to rewrite
- // the operand of the cloned phi.
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
- PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
- NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
- ValueMapping[PN] = NewPN;
- }
-
- // Clone the non-phi instructions of BB into NewBB, keeping track of the
- // mapping and using it to remap operands in the cloned instructions.
- for (; !BI->isTerminator(); ++BI) {
- Instruction *New = BI->clone();
- New->setName(BI->getName());
- NewBB->getInstList().push_back(New);
- ValueMapping[&*BI] = New;
-
- // Remap operands to patch up intra-block references.
- for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
- if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
- DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
- if (I != ValueMapping.end())
- New->setOperand(i, I->second);
- }
- }
+ // Copy all the instructions from BB to NewBB except the terminator.
+ DenseMap<Instruction *, Value *> ValueMapping =
+ CloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
@@ -2045,44 +2124,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
{DominatorTree::Insert, PredBB, NewBB},
{DominatorTree::Delete, PredBB, BB}});
- // If there were values defined in BB that are used outside the block, then we
- // now have to update all uses of the value to use either the original value,
- // the cloned value, or some PHI derived value. This can require arbitrary
- // PHI insertion, of which we are prepared to do, clean these up now.
- SSAUpdater SSAUpdate;
- SmallVector<Use*, 16> UsesToRename;
-
- for (Instruction &I : *BB) {
- // Scan all uses of this instruction to see if their uses are no longer
- // dominated by the previous def and if so, record them in UsesToRename.
- // Also, skip phi operands from PredBB - we'll remove them anyway.
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(U) == BB)
- continue;
- } else if (User->getParent() == BB)
- continue;
-
- UsesToRename.push_back(&U);
- }
-
- // If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty())
- continue;
- LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
-
- // We found a use of I outside of BB. Rename all uses of I that are outside
- // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
- // with the two values we know.
- SSAUpdate.Initialize(I.getType(), I.getName());
- SSAUpdate.AddAvailableValue(BB, &I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
-
- while (!UsesToRename.empty())
- SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- LLVM_DEBUG(dbgs() << "\n");
- }
+ UpdateSSA(BB, NewBB, ValueMapping);
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
@@ -2094,7 +2136,6 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
// Threaded an edge!
++NumThreads;
- return true;
}
/// Create a new basic block that will be the predecessor of BB and successor of
@@ -2366,43 +2407,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
- // If there were values defined in BB that are used outside the block, then we
- // now have to update all uses of the value to use either the original value,
- // the cloned value, or some PHI derived value. This can require arbitrary
- // PHI insertion, of which we are prepared to do, clean these up now.
- SSAUpdater SSAUpdate;
- SmallVector<Use*, 16> UsesToRename;
- for (Instruction &I : *BB) {
- // Scan all uses of this instruction to see if it is used outside of its
- // block, and if so, record them in UsesToRename.
- for (Use &U : I.uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(U) == BB)
- continue;
- } else if (User->getParent() == BB)
- continue;
-
- UsesToRename.push_back(&U);
- }
-
- // If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty())
- continue;
-
- LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
-
- // We found a use of I outside of BB. Rename all uses of I that are outside
- // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
- // with the two values we know.
- SSAUpdate.Initialize(I.getType(), I.getName());
- SSAUpdate.AddAvailableValue(BB, &I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
-
- while (!UsesToRename.empty())
- SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- LLVM_DEBUG(dbgs() << "\n");
- }
+ UpdateSSA(BB, PredBB, ValueMapping);
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 6ce4831a73592..8c33045c23802 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -63,6 +63,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -137,7 +138,8 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
TargetTransformInfo *TTI, bool &FreeInLoop);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
@@ -162,7 +164,7 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
namespace {
struct LoopInvariantCodeMotion {
@@ -390,8 +392,9 @@ bool LoopInvariantCodeMotion::runOnLoop(
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
Flags.IsSink = false;
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+ Changed |=
+ hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
+ CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -787,6 +790,41 @@ public:
};
} // namespace
+
+/// Return true if we know how to rewrite all uses of the given alloca after
+/// hoisting it out of the loop. The main concerns are a) potential captures
+/// and b) invariant.start markers which don't capture, but are no longer
+/// valid w/o a corresponding invariant.end.
+static bool canRewriteUsesOfAlloca(AllocaInst &AI) {
+ // TODO: This looks a lot like capture tracking, but we need to remove any
+ // invariant starts if we extend the lifetime of the alloca by hoisting it.
+ // We should probably refactor capture tracking into a form which allows us
+ // to reuse the relevant bits and remove the duplicated logic here.
+
+ SmallVector<Use *, 16> Worklist;
+ for (Use &U : AI.uses())
+ Worklist.push_back(&U);
+
+ unsigned NumUsesExplored = 0;
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+ NumUsesExplored++;
+ if (NumUsesExplored > DefaultMaxUsesToExplore)
+ return false;
+ // Non capturing, terminating uses
+ if (isa<LoadInst>(I) ||
+ (isa<StoreInst>(I) && U->getOperandNo() == 1))
+ continue;
+ // Non capturing, non-terminating
+ if (!isa<BitCastInst>(I) && !isa<GetElementPtrInst>(I))
+ return false;
+ for (Use &U : I->uses())
+ Worklist.push_back(&U);
+ }
+ return true;
+}
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@@ -795,7 +833,7 @@ public:
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ICFLoopSafetyInfo *SafetyInfo,
+ ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
@@ -855,7 +893,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, ORE);
+ MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
Changed = true;
continue;
@@ -882,7 +920,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
- SafetyInfo, MSSAU, ORE);
+ SafetyInfo, MSSAU, SE, ORE);
HoistedInstructions.push_back(ReciprocalDivisor);
Changed = true;
continue;
@@ -901,7 +939,17 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurLoop->hasLoopInvariantOperands(&I) &&
MustExecuteWithoutWritesBefore(I)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, ORE);
+ MSSAU, SE, ORE);
+ HoistedInstructions.push_back(&I);
+ Changed = true;
+ continue;
+ }
+
+ if (isa<AllocaInst>(&I) &&
+ SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+ canRewriteUsesOfAlloca(cast<AllocaInst>(I))) {
+ hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
+ MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
Changed = true;
continue;
@@ -915,7 +963,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
PN->setIncomingBlock(
i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
- MSSAU, ORE);
+ MSSAU, SE, ORE);
assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
Changed = true;
continue;
@@ -952,7 +1000,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
<< HoistPoint->getParent()->getName()
<< ": " << *I << "\n");
- moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
+ moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
HoistPoint = I;
Changed = true;
}
@@ -1142,6 +1190,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Assumes don't actually alias anything or throw
return true;
+ if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+ // Widenable conditions don't actually alias anything or throw
+ return true;
+
// Handle simple cases by querying alias analysis.
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory)
@@ -1441,14 +1493,18 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ ScalarEvolution *SE) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
if (MSSAU)
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU->getMemorySSA()->getMemoryAccess(&I)))
- MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
+ MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
+ MemorySSA::BeforeTerminator);
+ if (SE)
+ SE->forgetValue(&I);
}
static Instruction *sinkThroughTriviallyReplaceablePHI(
@@ -1662,7 +1718,8 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
///
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
<< "\n");
ORE->emit([&]() {
@@ -1683,10 +1740,10 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
- moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
+ moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
else
// Move the new node to the destination block, before its terminator.
- moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
+ moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
// Apply line 0 debug locations when we are moving instructions to different
// basic blocks because we want to avoid jumpy line tables.
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index a972d6fa2fcdf..ab65f56d088f7 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
+#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "loop-data-prefetch"
#include "llvm/ADT/DepthFirstIterator.h"
diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index cee197cf83540..2451572d61715 100644
--- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index f45e5fd0f50b4..8e04e6e0ffe83 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -55,6 +55,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9f93c68e6128d..e1738f08eb238 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -55,12 +55,15 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CodeMoverUtils.h"
using namespace llvm;
@@ -88,6 +91,7 @@ STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
STATISTIC(NonIdenticalGuards, "Candidates have different guards");
STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
+STATISTIC(NotRotated, "Candidate is not rotated");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -163,14 +167,8 @@ struct FusionCandidate {
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
- Latch(L->getLoopLatch()), L(L), Valid(true), GuardBranch(nullptr),
- DT(DT), PDT(PDT), ORE(ORE) {
-
- // TODO: This is temporary while we fuse both rotated and non-rotated
- // loops. Once we switch to only fusing rotated loops, the initialization of
- // GuardBranch can be moved into the initialization list above.
- if (isRotated())
- GuardBranch = L->getLoopGuardBranch();
+ Latch(L->getLoopLatch()), L(L), Valid(true),
+ GuardBranch(L->getLoopGuardBranch()), DT(DT), PDT(PDT), ORE(ORE) {
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@@ -257,15 +255,14 @@ struct FusionCandidate {
: GuardBranch->getSuccessor(0);
}
- bool isRotated() const {
- assert(L && "Expecting loop to be valid.");
- assert(Latch && "Expecting latch to be valid.");
- return L->isLoopExiting(Latch);
- }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- dbgs() << "\tGuardBranch: "
+ dbgs() << "\tGuardBranch: ";
+ if (GuardBranch)
+ dbgs() << *GuardBranch;
+ else
+ dbgs() << "nullptr";
+ dbgs() << "\n"
<< (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
<< "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
<< "\n"
@@ -316,6 +313,11 @@ struct FusionCandidate {
return reportInvalidCandidate(NotSimplifiedForm);
}
+ if (!L->isRotatedForm()) {
+ LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
+ return reportInvalidCandidate(NotRotated);
+ }
+
return true;
}
@@ -591,16 +593,8 @@ private:
const FusionCandidate &FC1) const {
assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
- BasicBlock *FC0EntryBlock = FC0.getEntryBlock();
- BasicBlock *FC1EntryBlock = FC1.getEntryBlock();
-
- if (DT.dominates(FC0EntryBlock, FC1EntryBlock))
- return PDT.dominates(FC1EntryBlock, FC0EntryBlock);
-
- if (DT.dominates(FC1EntryBlock, FC0EntryBlock))
- return PDT.dominates(FC0EntryBlock, FC1EntryBlock);
-
- return false;
+ return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
+ DT, PDT);
}
/// Iterate over all loops in the given loop set and identify the loops that
@@ -1113,6 +1107,29 @@ private:
return FC.ExitBlock->size() == 1;
}
+ /// Simplify the condition of the latch branch of \p FC to true, when both of
+ /// its successors are the same.
+ void simplifyLatchBranch(const FusionCandidate &FC) const {
+ BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
+ if (FCLatchBranch) {
+ assert(FCLatchBranch->isConditional() &&
+ FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
+ "Expecting the two successors of FCLatchBranch to be the same");
+ FCLatchBranch->setCondition(
+ llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
+ }
+ }
+
+ /// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
+ /// successor, then merge FC0.Latch with its unique successor.
+ void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
+ moveInstsBottomUp(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
+ if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
+ MergeBlockIntoPredecessor(Succ, &DTU, &LI);
+ DTU.flush();
+ }
+ }
+
/// Fuse two fusion candidates, creating a new fused loop.
///
/// This method contains the mechanics of fusing two loops, represented by \p
@@ -1246,6 +1263,10 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+ // Change the condition of FC0 latch branch to true, as both successors of
+ // the branch are the same.
+ simplifyLatchBranch(FC0);
+
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock)
@@ -1268,9 +1289,15 @@ private:
// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
+ // Note: Need to forget the loops before merging the loop latches, as
+ // mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
+ // Move instructions from FC0.Latch to FC1.Latch.
+ // Note: mergeLatch requires an updated DT.
+ mergeLatch(FC0, FC1);
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
FC1.L->block_end());
@@ -1490,6 +1517,10 @@ private:
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+ // Change the condition of FC0 latch branch to true, as both successors of
+ // the branch are the same.
+ simplifyLatchBranch(FC0);
+
// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock)
@@ -1521,9 +1552,15 @@ private:
// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
+ // Note: Need to forget the loops before merging the loop latches, as
+ // mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);
+ // Move instructions from FC0.Latch to FC1.Latch.
+ // Note: mergeLatch requires an updated DT.
+ mergeLatch(FC0, FC1);
+
// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
FC1.L->block_end());
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index dd477e8006937..b77843d7cd711 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,7 +41,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -78,20 +77,17 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -107,7 +103,6 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
-STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare");
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
@@ -117,26 +112,6 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
namespace {
-// FIXME: reinventing the wheel much? Is there a cleaner solution?
-struct PMAbstraction {
- virtual void markLoopAsDeleted(Loop *L) = 0;
- virtual ~PMAbstraction() = default;
-};
-struct LegacyPMAbstraction : PMAbstraction {
- LPPassManager &LPM;
- LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {}
- virtual ~LegacyPMAbstraction() = default;
- void markLoopAsDeleted(Loop *L) override { LPM.markLoopAsDeleted(*L); }
-};
-struct NewPMAbstraction : PMAbstraction {
- LPMUpdater &Updater;
- NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {}
- virtual ~NewPMAbstraction() = default;
- void markLoopAsDeleted(Loop *L) override {
- Updater.markLoopAsDeleted(*L, L->getName());
- }
-};
-
class LoopIdiomRecognize {
Loop *CurLoop = nullptr;
AliasAnalysis *AA;
@@ -146,7 +121,6 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
- PMAbstraction &LoopDeleter;
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
@@ -155,10 +129,9 @@ public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const DataLayout *DL, PMAbstraction &LoopDeleter,
+ const DataLayout *DL,
OptimizationRemarkEmitter &ORE)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL),
- LoopDeleter(LoopDeleter), ORE(ORE) {}
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
bool runOnLoop(Loop *L);
@@ -172,8 +145,6 @@ private:
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
- bool HasMemCmp;
- bool HasBCmp;
/// Return code for isLegalStore()
enum LegalStoreKind {
@@ -201,7 +172,7 @@ private:
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *StoredVal,
+ MaybeAlign StoreAlignment, Value *StoredVal,
Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores,
const SCEVAddRecExpr *Ev, const SCEV *BECount,
@@ -216,32 +187,6 @@ private:
bool runOnNoncountableLoop();
- struct CmpLoopStructure {
- Value *BCmpValue, *LatchCmpValue;
- BasicBlock *HeaderBrEqualBB, *HeaderBrUnequalBB;
- BasicBlock *LatchBrFinishBB, *LatchBrContinueBB;
- };
- bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const;
- struct CmpOfLoads {
- ICmpInst::Predicate BCmpPred;
- Value *LoadSrcA, *LoadSrcB;
- Value *LoadA, *LoadB;
- };
- bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const;
- bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads,
- CmpLoopStructure &CmpLoop) const;
- bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads,
- const SCEV *&SrcA, const SCEV *&SrcB,
- const SCEV *&Iterations) const;
- bool detectBCmpIdiom(ICmpInst *&BCmpInst, CmpInst *&LatchCmpInst,
- LoadInst *&LoadA, LoadInst *&LoadB, const SCEV *&SrcA,
- const SCEV *&SrcB, const SCEV *&NBytes) const;
- BasicBlock *transformBCmpControlFlow(ICmpInst *ComparedEqual);
- void transformLoopToBCmp(ICmpInst *BCmpInst, CmpInst *LatchCmpInst,
- LoadInst *LoadA, LoadInst *LoadB, const SCEV *SrcA,
- const SCEV *SrcB, const SCEV *NBytes);
- bool recognizeBCmp();
-
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
@@ -279,14 +224,13 @@ public:
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
- LegacyPMAbstraction LoopDeleter(LPM);
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE);
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
return LIR.runOnLoop(L);
}
@@ -305,7 +249,7 @@ char LoopIdiomRecognizeLegacyPass::ID = 0;
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &Updater) {
+ LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
const auto &FAM =
@@ -319,9 +263,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
"LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
"at a higher level");
- NewPMAbstraction LoopDeleter(Updater);
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
- LoopDeleter, *ORE);
+ *ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -358,8 +301,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy" || Name == "memcmp" ||
- Name == "bcmp")
+ if (Name == "memset" || Name == "memcpy")
return false;
// Determine if code size heuristics need to be applied.
@@ -369,10 +311,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);
- HasMemCmp = TLI->has(LibFunc_memcmp);
- HasBCmp = TLI->has(LibFunc_bcmp);
- if (HasMemset || HasMemsetPattern || HasMemcpy || HasMemCmp || HasBCmp)
+ if (HasMemset || HasMemsetPattern || HasMemcpy)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();
@@ -791,7 +731,8 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
bool NegStride = StoreSize == -Stride;
- if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
+ if (processLoopStridedStore(StorePtr, StoreSize,
+ MaybeAlign(HeadStore->getAlignment()),
StoredVal, HeadStore, AdjacentStores, StoreEv,
BECount, NegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
@@ -846,9 +787,9 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
bool NegStride = SizeInBytes == -Stride;
- return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getDestAlignment(), SplatValue, MSI, MSIs,
- Ev, BECount, NegStride, /*IsLoopMemset=*/true);
+ return processLoopStridedStore(
+ Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
+ SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -938,7 +879,7 @@ static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
- Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
+ Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
@@ -960,12 +901,12 @@ bool LoopIdiomRecognize::processLoopStridedStore(
SCEVExpander Expander(*SE, *DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
- Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
+ Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
if (NegStride)
- Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
+ Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -993,7 +934,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, everything looks good, insert the memset.
const SCEV *NumBytesS =
- getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1001,12 +942,12 @@ bool LoopIdiomRecognize::processLoopStridedStore(
return false;
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
CallInst *NewCall;
if (SplatValue) {
- NewCall =
- Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
+ MaybeAlign(StoreAlignment));
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
@@ -1014,7 +955,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntPtr);
+ Int8PtrTy, Int8PtrTy, IntIdxTy);
inferLibFuncAttributes(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -1081,11 +1022,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *StrStart = StoreEv->getStart();
unsigned StrAS = SI->getPointerAddressSpace();
- Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+ Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
// Handle negative strided loops.
if (NegStride)
- StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
+ StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1111,7 +1052,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Handle negative strided loops.
if (NegStride)
- LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+ LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
@@ -1133,18 +1074,18 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// Okay, everything is safe, we can transform this!
const SCEV *NumBytesS =
- getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must necessarily be unordered
// by previous checks.
if (!SI->isAtomic() && !LI->isAtomic())
- NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
- LoadBasePtr, LI->getAlignment(), NumBytes);
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
+ LI->getAlign(), NumBytes);
else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
@@ -1211,7 +1152,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");
- return recognizeBCmp() || recognizePopcount() || recognizeAndInsertFFS();
+ return recognizePopcount() || recognizeAndInsertFFS();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1885,811 +1826,3 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);
}
-
-bool LoopIdiomRecognize::matchBCmpLoopStructure(
- CmpLoopStructure &CmpLoop) const {
- ICmpInst::Predicate BCmpPred;
-
- // We are looking for the following basic layout:
- // PreheaderBB: <preheader> ; preds = ???
- // <...>
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
- // <...>
- // %BCmpValue = icmp <...>
- // br i1 %BCmpValue, label %LoopLatchBB, label %Successor0
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // %LatchCmpValue = <are we done, or do next iteration?>
- // br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB
- // Successor0: <exit> ; preds = %LoopHeaderBB
- // <...>
- // Successor1: <exit> ; preds = %LoopLatchBB
- // <...>
- //
- // Successor0 and Successor1 may or may not be the same basic block.
-
- // Match basic frame-work of this supposedly-comparison loop.
- using namespace PatternMatch;
- if (!match(CurLoop->getHeader()->getTerminator(),
- m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()),
- m_Value(CmpLoop.BCmpValue)),
- CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) ||
- !match(CurLoop->getLoopLatch()->getTerminator(),
- m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)),
- CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) {
- LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n");
- return true;
-}
-
-bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue,
- CmpOfLoads &CmpOfLoads) const {
- using namespace PatternMatch;
- LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValue
- << " as bcmp pattern.\n");
-
- // Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example:
- // %v0 = load <...>, <...>* %LoadSrcA
- // %v1 = load <...>, <...>* %LoadSrcB
- // %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1
- // There won't be any no-op bitcasts between load and icmp,
- // they would have been transformed into a load of bitcast.
- // FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too.
- if (!match(BCmpValue,
- m_ICmp(CmpOfLoads.BCmpPred,
- m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)),
- m_Value(CmpOfLoads.LoadA)),
- m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)),
- m_Value(CmpOfLoads.LoadB)))) ||
- !ICmpInst::isEquality(CmpOfLoads.BCmpPred)) {
- LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
- << *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads.LoadB
- << "\n");
- // FIXME: handle memcmp pattern?
- return true;
-}
-
-bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow(
- const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const {
- BasicBlock *LoopHeaderBB = CurLoop->getHeader();
- BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
-
- // Be wary, comparisons can be inverted, canonicalize order.
- // If this 'element' comparison passed, we expect to proceed to the next elt.
- if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ)
- std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB);
- // The predicate on loop latch does not matter, just canonicalize some order.
- if (CmpLoop.LatchBrContinueBB != LoopHeaderBB)
- std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB);
-
- SmallVector<BasicBlock *, 2> ExitBlocks;
-
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks.");
-
- // Check that control-flow between blocks is as expected.
- if (CmpLoop.HeaderBrEqualBB != LoopLatchBB ||
- CmpLoop.LatchBrContinueBB != LoopHeaderBB ||
- !is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) ||
- !is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB)) {
- LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n");
- return false;
- }
-
- assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
- !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
- "Unexpected exit edges.");
-
- LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n");
-
- LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n");
- assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here.");
- // No loop instructions must be used outside of the loop. Since we are in
- // LCSSA form, we only need to check successor block's PHI nodes's incoming
- // values for incoming blocks that are the loop basic blocks.
- for (const BasicBlock *ExitBB : ExitBlocks) {
- for (const PHINode &PHI : ExitBB->phis()) {
- for (const BasicBlock *LoopBB :
- make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) {
- return CurLoop->contains(PredecessorBB);
- })) {
- const auto *I =
- dyn_cast<Instruction>(PHI.getIncomingValueForBlock(LoopBB));
- if (I && CurLoop->contains(I)) {
- LLVM_DEBUG(dbgs()
- << "Loop contains instruction " << *I
- << " which is used outside of the loop in basic block "
- << ExitBB->getName() << " in phi node " << PHI << "\n");
- return false;
- }
- }
- }
- }
- // Similarly, the loop should not have any other observable side-effects
- // other than the final comparison result.
- for (BasicBlock *LoopBB : CurLoop->blocks()) {
- for (Instruction &I : *LoopBB) {
- if (isa<DbgInfoIntrinsic>(I)) // Ignore dbginfo.
- continue; // FIXME: anything else? lifetime info?
- if ((I.mayHaveSideEffects() || I.isAtomic() || I.isFenceLike()) &&
- &I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) {
- LLVM_DEBUG(
- dbgs() << "Loop contains instruction with potential side-effects: "
- << I << "\n");
- return false;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n");
- return true;
-}
-
-bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
- CmpOfLoads &CmpOfLoads,
- const SCEV *&SrcA,
- const SCEV *&SrcB,
- const SCEV *&Iterations) const {
- // Try to compute SCEV of the loads, for this loop's scope.
- const auto *ScevForSrcA = dyn_cast<SCEVAddRecExpr>(
- SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop));
- const auto *ScevForSrcB = dyn_cast<SCEVAddRecExpr>(
- SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop));
- if (!ScevForSrcA || !ScevForSrcB) {
- LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"
- << *ScevForSrcA << "\n\t" << *ScevForSrcB << "\n");
-
- // Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB>
- const SCEV *RecStepForA = ScevForSrcA->getStepRecurrence(*SE);
- const SCEV *RecStepForB = ScevForSrcB->getStepRecurrence(*SE);
- if (!ScevForSrcA->isAffine() || !ScevForSrcB->isAffine() ||
- ScevForSrcA->getLoop() != CurLoop || ScevForSrcB->getLoop() != CurLoop ||
- RecStepForA != RecStepForB || !isa<SCEVConstant>(RecStepForA) ||
- cast<SCEVConstant>(RecStepForA)->getAPInt() != BCmpTyBytes) {
- LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support "
- "affine SCEV expressions originating in the loop we "
- "are analysing with identical constant positive step, "
- "equal to the count of bytes compared. Got:\n\t"
- << *RecStepForA << "\n\t" << *RecStepForB << "\n");
- return false;
- // FIXME: can support BCmpTyBytes > Step.
- // But will need to account for the extra bytes compared at the end.
- }
-
- SrcA = ScevForSrcA->getStart();
- SrcB = ScevForSrcB->getStart();
- LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcA
- << "\n\t" << *SrcB << "\n");
-
- // The load sources must be loop-invants that dominate the loop header.
- if (SrcA == SE->getCouldNotCompute() || SrcB == SE->getCouldNotCompute() ||
- !SE->isAvailableAtLoopEntry(SrcA, CurLoop) ||
- !SE->isAvailableAtLoopEntry(SrcB, CurLoop)) {
- LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable "
- "prior to loop header.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n");
-
- // bcmp / memcmp take length argument as size_t, so let's conservatively
- // assume that the iteration count should be not wider than that.
- Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext());
-
- // For how many iterations is loop guaranteed not to exit via LoopLatch?
- // This is one less than the maximal number of comparisons,and is: n + -1
- const SCEV *LoopExitCount =
- SE->getExitCount(CurLoop, CurLoop->getLoopLatch());
- LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: "
- << *LoopExitCount << "\n");
- // Exit count, similarly, must be loop-invant that dominates the loop header.
- if (LoopExitCount == SE->getCouldNotCompute() ||
- !LoopExitCount->getType()->isIntOrPtrTy() ||
- LoopExitCount->getType()->getScalarSizeInBits() >
- CmpFuncSizeTy->getScalarSizeInBits() ||
- !SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) {
- LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n");
- return false;
- }
-
- // LoopExitCount is always one less than the actual count of iterations.
- // Do this before cast, else we will be stuck with 1 + zext(-1 + n)
- Iterations = SE->getAddExpr(
- LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW);
- assert(Iterations != SE->getCouldNotCompute() &&
- "Shouldn't fail to increment by one.");
-
- LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n");
- return true;
-}
-
-/// Return true iff the bcmp idiom is detected in the loop.
-///
-/// Additionally:
-/// 1) \p BCmpInst is set to the root byte-comparison instruction.
-/// 2) \p LatchCmpInst is set to the comparison that controls the latch.
-/// 3) \p LoadA is set to the first LoadInst.
-/// 4) \p LoadB is set to the second LoadInst.
-/// 5) \p SrcA is set to the first source location that is being compared.
-/// 6) \p SrcB is set to the second source location that is being compared.
-/// 7) \p NBytes is set to the number of bytes to compare.
-bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst,
- CmpInst *&LatchCmpInst,
- LoadInst *&LoadA, LoadInst *&LoadB,
- const SCEV *&SrcA, const SCEV *&SrcB,
- const SCEV *&NBytes) const {
- LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n");
-
- // Give up if the loop is not in normal form, or has more than 2 blocks.
- if (!CurLoop->isLoopSimplifyForm() || CurLoop->getNumBlocks() > 2) {
- LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n");
- return false;
- }
- LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n");
-
- CmpLoopStructure CmpLoop;
- if (!matchBCmpLoopStructure(CmpLoop))
- return false;
-
- CmpOfLoads CmpOfLoads;
- if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads))
- return false;
-
- if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop))
- return false;
-
- BCmpInst = cast<ICmpInst>(CmpLoop.BCmpValue); // FIXME: is there no
- LatchCmpInst = cast<CmpInst>(CmpLoop.LatchCmpValue); // way to combine
- LoadA = cast<LoadInst>(CmpOfLoads.LoadA); // these cast with
- LoadB = cast<LoadInst>(CmpOfLoads.LoadB); // m_Value() matcher?
-
- Type *BCmpValTy = BCmpInst->getOperand(0)->getType();
- LLVMContext &Context = BCmpValTy->getContext();
- uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy);
- static constexpr uint64_t ByteTyBits = 8;
-
- LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTy
- << " of size " << BCmpTyBits
- << " bits (while byte = " << ByteTyBits << " bits).\n");
- // bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check
- // that we are dealing with a multiple of a byte here.
- if (BCmpTyBits % ByteTyBits != 0) {
- LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n");
- return false;
- // FIXME: could still be done under a run-time check that the total bit
- // count is a multiple of a byte i guess? Or handle remainder separately?
- }
-
- // Each comparison is done on this many bytes.
- uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits;
- LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytes
- << " bytes, eligible for bcmp conversion.\n");
-
- const SCEV *Iterations;
- if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations))
- return false;
-
- // bcmp / memcmp take length argument as size_t, do promotion now.
- Type *CmpFuncSizeTy = DL->getIntPtrType(Context);
- Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy);
- assert(Iterations != SE->getCouldNotCompute() && "Promotion failed.");
- // Note that it didn't do ptrtoint cast, we will need to do it manually.
-
- // We will be comparing *bytes*, not BCmpTy, we need to recalculate size.
- // It's a multiplication, and it *could* overflow. But for it to overflow
- // we'd want to compare more bytes than could be represented by size_t, But
- // allocation functions also take size_t. So how'd you produce such buffer?
- // FIXME: we likely need to actually check that we know this won't overflow,
- // via llvm::computeOverflowForUnsignedMul().
- NBytes = SE->getMulExpr(
- Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW);
- assert(NBytes != SE->getCouldNotCompute() &&
- "Shouldn't fail to increment by one.");
-
- LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n");
-
- if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() ||
- LoadA->getPointerAddressSpace() != 0 || !LoadA->isSimple() ||
- !LoadB->isSimple()) {
- StringLiteral L("Unsupported loads in idiom - only support identical, "
- "simple loads from address space 0.\n");
- LLVM_DEBUG(dbgs() << L);
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "BCmpIdiomUnsupportedLoads",
- BCmpInst->getDebugLoc(),
- CurLoop->getHeader())
- << L;
- });
- return false; // FIXME: support non-simple loads.
- }
-
- LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n");
- ORE.emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "RecognizedBCmpIdiom",
- CurLoop->getStartLoc(),
- CurLoop->getHeader())
- << "Loop recognized as a bcmp idiom";
- });
-
- return true;
-}
-
-BasicBlock *
-LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) {
- LLVM_DEBUG(dbgs() << "Transforming control-flow.\n");
- SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
-
- BasicBlock *PreheaderBB = CurLoop->getLoopPreheader();
- BasicBlock *HeaderBB = CurLoop->getHeader();
- BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
- SmallString<32> LoopName = CurLoop->getName();
- Function *Func = PreheaderBB->getParent();
- LLVMContext &Context = Func->getContext();
-
- // Before doing anything, drop SCEV info.
- SE->forgetLoop(CurLoop);
-
- // Here we start with: (0/6)
- // PreheaderBB: <preheader> ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
- // <...>
- // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
- // Successor0BB: <exit> ; preds = %LoopHeaderBB
- // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
- // <...>
- // Successor1BB: <exit> ; preds = %LoopLatchBB
- // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
- // <...>
- //
- // Successor0 and Successor1 may or may not be the same basic block.
-
- // Decouple the edge between loop preheader basic block and loop header basic
- // block. Thus the loop has become unreachable.
- assert(cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() &&
- PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB &&
- "Preheader bb must end with an unconditional branch to header bb.");
- PreheaderBB->getTerminator()->eraseFromParent();
- DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB});
-
- // Create a new preheader basic block before loop header basic block.
- auto *PhonyPreheaderBB = BasicBlock::Create(
- Context, LoopName + ".phonypreheaderbb", Func, HeaderBB);
- // And insert an unconditional branch from phony preheader basic block to
- // loop header basic block.
- IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB);
- DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
-
- // Create a *single* new empty block that we will substitute as a
- // successor basic block for the loop's exits. This one is temporary.
- // Much like phony preheader basic block, it is not connected.
- auto *PhonySuccessorBB =
- BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func,
- LoopLatchBB->getNextNode());
- // That block must have *some* non-PHI instruction, or else deleteDeadLoop()
- // will mess up cleanup of dbginfo, and verifier will complain.
- IRBuilder<>(PhonySuccessorBB).CreateUnreachable();
-
- // Create two new empty blocks that we will use to preserve the original
- // loop exit control-flow, and preserve the incoming values in the PHI nodes
- // in loop's successor exit blocks. These will live one.
- auto *ComparedUnequalBB =
- BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func,
- PhonySuccessorBB->getNextNode());
- auto *ComparedEqualBB =
- BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func,
- PhonySuccessorBB->getNextNode());
-
- // By now we have: (1/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // [no terminator instruction!]
- // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
- // <...>
- // br i1 %<...>, label %LoopLatchBB, label %Successor0BB
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
- // PhonySuccessorBB: ; No preds, UNREACHABLE!
- // unreachable
- // EqualBB: ; No preds, UNREACHABLE!
- // [no terminator instruction!]
- // UnequalBB: ; No preds, UNREACHABLE!
- // [no terminator instruction!]
- // Successor0BB: <exit> ; preds = %LoopHeaderBB
- // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
- // <...>
- // Successor1BB: <exit> ; preds = %LoopLatchBB
- // %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
- // <...>
-
- // What is the mapping/replacement basic block for exiting out of the loop
- // from either of old's loop basic blocks?
- auto GetReplacementBB = [this, ComparedEqualBB,
- ComparedUnequalBB](const BasicBlock *OldBB) {
- assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks.");
- if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal".
- return ComparedEqualBB;
- if (OldBB == CurLoop->getHeader()) // "element compared unequal".
- return ComparedUnequalBB;
- llvm_unreachable("Only had two basic blocks in loop.");
- };
-
- // What are the exits out of this loop?
- SmallVector<Loop::Edge, 2> LoopExitEdges;
- CurLoop->getExitEdges(LoopExitEdges);
- assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges.");
-
- // Populate new basic blocks, update the exiting control-flow, PHI nodes.
- for (const Loop::Edge &Edge : LoopExitEdges) {
- auto *OldLoopBB = const_cast<BasicBlock *>(Edge.first);
- auto *SuccessorBB = const_cast<BasicBlock *>(Edge.second);
- assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) &&
- "Unexpected edge.");
-
- // If we would exit the loop from this loop's basic block,
- // what semantically would that mean? Did comparison succeed or fail?
- BasicBlock *NewBB = GetReplacementBB(OldLoopBB);
- assert(NewBB->empty() && "Should not get same new basic block here twice.");
- IRBuilder<> Builder(NewBB);
- Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc());
- Builder.CreateBr(SuccessorBB);
- DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB});
- // Also, be *REALLY* careful with PHI nodes in successor basic block,
- // update them to recieve the same input value, but not from current loop's
- // basic block, but from new basic block instead.
- SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB);
- // Also, change loop control-flow. This loop's basic block shall no longer
- // exit from the loop to it's original successor basic block, but to our new
- // phony successor basic block. Note that new successor will be unique exit.
- OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB,
- PhonySuccessorBB);
- DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB});
- DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB});
- }
-
- // Inform DomTree about edge changes. Note that LoopInfo is still out-of-date.
- assert(DTUpdates.size() == 8 && "Update count prediction failed.");
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- DTU.applyUpdates(DTUpdates);
- DTUpdates.clear();
-
- // By now we have: (2/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // [no terminator instruction!]
- // PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
- // br label %LoopHeaderBB
- // LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
- // <...>
- // br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB
- // LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
- // <...>
- // br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB
- // PhonySuccessorBB: <uniq. exit> ; preds = %LoopHeaderBB, %LoopLatchBB
- // unreachable
- // EqualBB: ; No preds, UNREACHABLE!
- // br label %Successor1BB
- // UnequalBB: ; No preds, UNREACHABLE!
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // *Finally*, zap the original loop. Record it's parent loop though.
- Loop *ParentLoop = CurLoop->getParentLoop();
- LLVM_DEBUG(dbgs() << "Deleting old loop.\n");
- LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted *BEFORE* deleting!
- deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop.
- CurLoop = nullptr;
-
- // By now we have: (3/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // [no terminator instruction!]
- // PhonyPreheaderBB: ; No preds, UNREACHABLE!
- // br label %PhonySuccessorBB
- // PhonySuccessorBB: ; preds = %PhonyPreheaderBB
- // unreachable
- // EqualBB: ; No preds, UNREACHABLE!
- // br label %Successor1BB
- // UnequalBB: ; No preds, UNREACHABLE!
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // Now, actually restore the CFG.
-
- // Insert an unconditional branch from an actual preheader basic block to
- // phony preheader basic block.
- IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB);
- DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
- // Insert proper conditional branch from phony successor basic block to the
- // "dispatch" basic blocks, which were used to preserve incoming values in
- // original loop's successor basic blocks.
- assert(isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) &&
- "Yep, that's the one we created to keep deleteDeadLoop() happy.");
- PhonySuccessorBB->getTerminator()->eraseFromParent();
- {
- IRBuilder<> Builder(PhonySuccessorBB);
- Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc());
- Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB);
- }
- DTUpdates.push_back(
- {DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB});
- DTUpdates.push_back(
- {DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB});
-
- BasicBlock *DispatchBB = PhonySuccessorBB;
- DispatchBB->setName(LoopName + ".bcmpdispatchbb");
-
- assert(DTUpdates.size() == 3 && "Update count prediction failed.");
- DTU.applyUpdates(DTUpdates);
- DTUpdates.clear();
-
- // By now we have: (4/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %PhonyPreheaderBB
- // PhonyPreheaderBB: ; preds = %PreheaderBB
- // br label %DispatchBB
- // DispatchBB: ; preds = %PhonyPreheaderBB
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: ; preds = %DispatchBB
- // br label %Successor1BB
- // UnequalBB: ; preds = %DispatchBB
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // The basic CFG has been restored! Now let's merge redundant basic blocks.
-
- // Merge phony successor basic block into it's only predecessor,
- // phony preheader basic block. It is fully pointlessly redundant.
- MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
-
- // By now we have: (5/6)
- // PreheaderBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %DispatchBB
- // DispatchBB: ; preds = %PreheaderBB
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: ; preds = %DispatchBB
- // br label %Successor1BB
- // UnequalBB: ; preds = %DispatchBB
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // Was this loop nested?
- if (!ParentLoop) {
- // If the loop was *NOT* nested, then let's also merge phony successor
- // basic block into it's only predecessor, preheader basic block.
- // Also, here we need to update LoopInfo.
- LI->removeBlock(PreheaderBB);
- MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);
-
- // By now we have: (6/6)
- // DispatchBB: ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: ; preds = %DispatchBB
- // br label %Successor1BB
- // UnequalBB: ; preds = %DispatchBB
- // br label %Successor0BB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- return DispatchBB;
- }
-
- // Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop.
- // To achieve that, we shall keep the preheader basic block (mainly so that
- // the loop header block will be guaranteed to have a predecessor outside of
- // the loop), and create a phony loop with all these new three basic blocks.
- Loop *PhonyLoop = LI->AllocateLoop();
- ParentLoop->addChildLoop(PhonyLoop);
- PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI);
- PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI);
- PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI);
-
- // But we only have a preheader basic block, a header basic block block and
- // two exiting basic blocks. For a proper loop we also need a backedge from
- // non-header basic block to header bb.
- // Let's just add a never-taken branch from both of the exiting basic blocks.
- for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) {
- BranchInst *OldTerminator = cast<BranchInst>(BB->getTerminator());
- assert(OldTerminator->isUnconditional() && "That's the one we created.");
- BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0);
-
- IRBuilder<> Builder(OldTerminator);
- Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc());
- Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB,
- DispatchBB);
- OldTerminator->eraseFromParent();
- // Yes, the backedge will never be taken. The control-flow is redundant.
- // If it can be simplified further, other passes will take care.
- DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB});
- DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB});
- DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB});
- }
- assert(DTUpdates.size() == 6 && "Update count prediction failed.");
- DTU.applyUpdates(DTUpdates);
- DTUpdates.clear();
-
- // By now we have: (6/6)
- // PreheaderBB: <preheader> ; preds = ???
- // <...>
- // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
- // %ComparedEqual = icmp eq <...> %memcmp, 0
- // br label %BCmpDispatchBB
- // BCmpDispatchBB: <header> ; preds = %PreheaderBB
- // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
- // EqualBB: <latch,exiting> ; preds = %BCmpDispatchBB
- // br i1 %true, label %Successor1BB, label %BCmpDispatchBB
- // UnequalBB: <latch,exiting> ; preds = %BCmpDispatchBB
- // br i1 %true, label %Successor0BB, label %BCmpDispatchBB
- // Successor0BB: ; preds = %UnequalBB
- // %S0PHI = phi <...> [ <...>, %UnequalBB ]
- // <...>
- // Successor1BB: ; preds = %EqualBB
- // %S0PHI = phi <...> [ <...>, %EqualBB ]
- // <...>
-
- // Finally fully DONE!
- return DispatchBB;
-}
-
-void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst,
- CmpInst *LatchCmpInst,
- LoadInst *LoadA, LoadInst *LoadB,
- const SCEV *SrcA, const SCEV *SrcB,
- const SCEV *NBytes) {
- // We will be inserting before the terminator instruction of preheader block.
- IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator());
-
- LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n");
- LLVM_DEBUG(dbgs() << "Emitting new instructions.\n");
-
- // Expand the SCEV expressions for both sources to compare, and produce value
- // for the byte len (beware of Iterations potentially being a pointer, and
- // account for element size being BCmpTyBytes bytes, which may be not 1 byte)
- Value *PtrA, *PtrB, *Len;
- {
- SCEVExpander SExp(*SE, *DL, "LoopToBCmp");
- SExp.setInsertPoint(&*Builder.GetInsertPoint());
-
- auto HandlePtr = [&SExp](LoadInst *Load, const SCEV *Src) {
- SExp.SetCurrentDebugLocation(DebugLoc());
- // If the pointer operand of original load had dbgloc - use it.
- if (const auto *I = dyn_cast<Instruction>(Load->getPointerOperand()))
- SExp.SetCurrentDebugLocation(I->getDebugLoc());
- return SExp.expandCodeFor(Src);
- };
- PtrA = HandlePtr(LoadA, SrcA);
- PtrB = HandlePtr(LoadB, SrcB);
-
- // For len calculation let's use dbgloc for the loop's latch condition.
- Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
- SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
- Len = SExp.expandCodeFor(NBytes);
-
- Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext());
- assert(SE->getTypeSizeInBits(Len->getType()) ==
- DL->getTypeSizeInBits(CmpFuncSizeTy) &&
- "Len should already have the correct size.");
-
- // Make sure that iteration count is a number, insert ptrtoint cast if not.
- if (Len->getType()->isPointerTy())
- Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy);
- assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now.");
-
- Len->setName(Len->getName() + ".bytecount");
-
- // There is no legality check needed. We want to compare that the memory
- // regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal.
- // For them to be fully equal, they must match bit-by-bit. And likewise,
- // for them to *NOT* be fully equal, they have to differ just by one bit.
- // The step of comparison (bits compared at once) simply does not matter.
- }
-
- // For the rest of new instructions, dbgloc should point at the value cmp.
- Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc());
-
- // Emit the comparison itself.
- auto *CmpCall =
- cast<CallInst>(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI)
- : emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI));
- // FIXME: add {B,Mem}CmpInst with MemoryCompareInst
- // (based on MemIntrinsicBase) as base?
- // FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...)
-
- // {b,mem}cmp returned 0 if they were equal, or non-zero if not equal.
- auto *ComparedEqual = cast<ICmpInst>(Builder.CreateICmpEQ(
- CmpCall, ConstantInt::get(CmpCall->getType(), 0),
- PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp"));
-
- BasicBlock *BB = transformBCmpControlFlow(ComparedEqual);
- Builder.ClearInsertionPoint();
-
- // We're done.
- LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n");
- ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "TransformedBCmpIdiomToCall",
- CmpCall->getDebugLoc(), BB)
- << "Transformed bcmp idiom into a call to "
- << ore::NV("NewFunction", CmpCall->getCalledFunction())
- << "() function";
- });
- ++NumBCmp;
-}
-
-/// Recognizes a bcmp idiom in a non-countable loop.
-///
-/// If detected, transforms the relevant code to issue the bcmp (or memcmp)
-/// intrinsic function call, and returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeBCmp() {
- if (!HasMemCmp && !HasBCmp)
- return false;
-
- ICmpInst *BCmpInst;
- CmpInst *LatchCmpInst;
- LoadInst *LoadA, *LoadB;
- const SCEV *SrcA, *SrcB, *NBytes;
- if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB,
- NBytes)) {
- LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n");
- return false;
- }
-
- transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes);
- return true;
-}
diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 368b9d4e8df17..901204181a7cb 100644
--- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
@@ -226,7 +227,8 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
- AR.MSSA->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ AR.MSSA->verifyMemorySSA();
}
if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 1af4b21b432e2..6ce2d06058cf3 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -716,22 +717,6 @@ bool LoopInterchangeLegality::findInductionAndReductions(
return true;
}
-static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
- for (PHINode &PHI : Block->phis()) {
- // Reduction lcssa phi will have only 1 incoming block that from loop latch.
- if (PHI.getNumIncomingValues() > 1)
- return false;
- Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
- if (!Ins)
- return false;
- // Incoming value for lcssa phi's in outer loop exit can only be inner loop
- // exits lcssa phi else it would not be tightly nested.
- if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
- return false;
- }
- return true;
-}
-
// This function indicates the current limitations in the transform as a result
// of which we do not proceed.
bool LoopInterchangeLegality::currentLimitations() {
@@ -830,21 +815,6 @@ bool LoopInterchangeLegality::currentLimitations() {
return true;
}
- // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
- BasicBlock *InnerExit = InnerLoop->getExitBlock();
- if (!containsSafePHI(InnerExit, false)) {
- LLVM_DEBUG(
- dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with LCSSA PHIs can be interchange "
- "currently.";
- });
- return true;
- }
-
// TODO: Current limitation: Since we split the inner loop latch at the point
// were induction variable is incremented (induction.next); We cannot have
// more than 1 user of induction.next since it would result in broken code
@@ -920,6 +890,28 @@ bool LoopInterchangeLegality::currentLimitations() {
return false;
}
+// We currently only support LCSSA PHI nodes in the inner loop exit, if their
+// users are either reduction PHIs or PHIs outside the outer loop (which means
+// the we are only interested in the final value after the loop).
+static bool
+areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
+ SmallPtrSetImpl<PHINode *> &Reductions) {
+ BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
+ for (PHINode &PHI : InnerExit->phis()) {
+ // Reduction lcssa phi will have only 1 incoming block that from loop latch.
+ if (PHI.getNumIncomingValues() > 1)
+ return false;
+ if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
+ PHINode *PN = dyn_cast<PHINode>(U);
+ return !PN || (Reductions.find(PN) == Reductions.end() &&
+ OuterL->contains(PN->getParent()));
+ })) {
+ return false;
+ }
+ }
+ return true;
+}
+
// We currently support LCSSA PHI nodes in the outer loop exit, if their
// incoming values do not come from the outer loop latch or if the
// outer loop latch has a single predecessor. In that case, the value will
@@ -927,7 +919,7 @@ bool LoopInterchangeLegality::currentLimitations() {
// will still be true after interchanging. If we have multiple predecessor,
// that may not be the case, e.g. because the outer loop latch may be executed
// if the inner loop is not executed.
-static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
+static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
for (PHINode &PHI : LoopNestExit->phis()) {
// FIXME: We currently are not able to detect floating point reductions
@@ -1012,7 +1004,19 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
return false;
}
- if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
+ if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
+ OuterInnerReductions)) {
+ LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Found unsupported PHI node in loop exit.";
+ });
+ return false;
+ }
+
+ if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
@@ -1315,31 +1319,39 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
FromBB->getTerminator()->getIterator());
}
-/// Update BI to jump to NewBB instead of OldBB. Records updates to
-/// the dominator tree in DTUpdates, if DT should be preserved.
+// Update BI to jump to NewBB instead of OldBB. Records updates to the
+// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
+// \p OldBB is exactly once in BI's successor list.
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
BasicBlock *NewBB,
- std::vector<DominatorTree::UpdateType> &DTUpdates) {
- assert(llvm::count_if(successors(BI),
- [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
- "BI must jump to OldBB at most once.");
- for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
- if (BI->getSuccessor(i) == OldBB) {
- BI->setSuccessor(i, NewBB);
-
- DTUpdates.push_back(
- {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
- DTUpdates.push_back(
- {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
- break;
+ std::vector<DominatorTree::UpdateType> &DTUpdates,
+ bool MustUpdateOnce = true) {
+ assert((!MustUpdateOnce ||
+ llvm::count_if(successors(BI),
+ [OldBB](BasicBlock *BB) {
+ return BB == OldBB;
+ }) == 1) && "BI must jump to OldBB exactly once.");
+ bool Changed = false;
+ for (Use &Op : BI->operands())
+ if (Op == OldBB) {
+ Op.set(NewBB);
+ Changed = true;
}
+
+ if (Changed) {
+ DTUpdates.push_back(
+ {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
+ DTUpdates.push_back(
+ {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
}
+ assert(Changed && "Expected a successor to be updated");
}
// Move Lcssa PHIs to the right place.
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
BasicBlock *InnerLatch, BasicBlock *OuterHeader,
- BasicBlock *OuterLatch, BasicBlock *OuterExit) {
+ BasicBlock *OuterLatch, BasicBlock *OuterExit,
+ Loop *InnerLoop, LoopInfo *LI) {
// Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
// defined either in the header or latch. Those blocks will become header and
@@ -1394,19 +1406,17 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
P->moveBefore(InnerExit->getFirstNonPHI());
// Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
- // incoming values from the outer latch or header, we have to add a new PHI
+ // incoming values defined in the outer loop, we have to add a new PHI
// in the inner loop latch, which became the exit block of the outer loop,
// after interchanging.
if (OuterExit) {
for (PHINode &P : OuterExit->phis()) {
if (P.getNumIncomingValues() != 1)
continue;
- // Skip Phis with incoming values not defined in the outer loop's header
- // and latch. Also skip incoming phis defined in the latch. Those should
+ // Skip Phis with incoming values defined in the inner loop. Those should
// already have been updated.
auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
- if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
- I->getParent() != OuterHeader))
+ if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
continue;
PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
@@ -1481,12 +1491,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (!InnerLoopHeaderSuccessor)
return false;
- // Adjust Loop Preheader and headers
+ // Adjust Loop Preheader and headers.
+ // The branches in the outer loop predecessor and the outer loop header can
+ // be unconditional branches or conditional branches with duplicates. Consider
+ // this when updating the successors.
updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
- InnerLoopPreHeader, DTUpdates);
- updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
+ InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
+ // The outer loop header might or might not branch to the outer latch.
+ // We are guaranteed to branch to the inner loop preheader.
+ if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI),
+ OuterLoopLatch) != succ_end(OuterLoopHeaderBI))
+ updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
+ /*MustUpdateOnce=*/false);
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
- InnerLoopHeaderSuccessor, DTUpdates);
+ InnerLoopHeaderSuccessor, DTUpdates,
+ /*MustUpdateOnce=*/false);
// Adjust reduction PHI's now that the incoming block has changed.
InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
@@ -1520,7 +1539,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
OuterLoopPreHeader);
moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
- OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
+ OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
+ InnerLoop, LI);
// For PHIs in the exit block of the outer loop, outer's latch has been
// replaced by Inners'.
OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index e8dc879a184b4..4e1b4e87ebc95 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -49,6 +49,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -488,7 +489,7 @@ public:
// Filter the candidates further.
SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
unsigned NumForwarding = 0;
- for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
+ for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) {
LLVM_DEBUG(dbgs() << "Candidate " << Cand);
// Make sure that the stored values is available everywhere in the loop in
@@ -544,7 +545,8 @@ public:
auto *HeaderBB = L->getHeader();
auto *F = HeaderBB->getParent();
bool OptForSize = F->hasOptSize() ||
- llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+ llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
+ PGSOQueryType::IRPass);
if (OptForSize) {
LLVM_DEBUG(
dbgs() << "Versioning is needed but not allowed when optimizing "
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 885c0e8f4b8b8..1a42f6b23443e 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -191,9 +191,12 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -248,7 +251,9 @@ struct LoopICmp {
class LoopPredication {
AliasAnalysis *AA;
+ DominatorTree *DT;
ScalarEvolution *SE;
+ LoopInfo *LI;
BranchProbabilityInfo *BPI;
Loop *L;
@@ -300,10 +305,13 @@ class LoopPredication {
// within the loop. We identify such unprofitable loops through BPI.
bool isLoopProfitableToPredicate();
+ bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
+
public:
- LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
+ LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
+ ScalarEvolution *SE, LoopInfo *LI,
BranchProbabilityInfo *BPI)
- : AA(AA), SE(SE), BPI(BPI){};
+ : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
bool runOnLoop(Loop *L);
};
@@ -323,10 +331,12 @@ public:
if (skipLoop(L))
return false;
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BranchProbabilityInfo &BPI =
getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LoopPredication LP(AA, SE, &BPI);
+ LoopPredication LP(AA, DT, SE, LI, &BPI);
return LP.runOnLoop(L);
}
};
@@ -352,7 +362,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
- LoopPredication LP(&AR.AA, &AR.SE, BPI);
+ LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, BPI);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -823,9 +833,9 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = BI->getCondition();
BI->setCondition(AllChecks);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
assert(isGuardAsWidenableBranch(BI) &&
"Stopped being a guard after transform?");
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
@@ -953,6 +963,233 @@ bool LoopPredication::isLoopProfitableToPredicate() {
return true;
}
+/// If we can (cheaply) find a widenable branch which controls entry into the
+/// loop, return it.
+static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) {
+ // Walk back through any unconditional executed blocks and see if we can find
+ // a widenable condition which seems to control execution of this loop. Note
+ // that we predict that maythrow calls are likely untaken and thus that it's
+ // profitable to widen a branch before a maythrow call with a condition
+ // afterwards even though that may cause the slow path to run in a case where
+ // it wouldn't have otherwise.
+ BasicBlock *BB = L->getLoopPreheader();
+ if (!BB)
+ return nullptr;
+ do {
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ if (BB == Pred->getSingleSuccessor()) {
+ BB = Pred;
+ continue;
+ }
+ break;
+ } while (true);
+
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ auto *Term = Pred->getTerminator();
+
+ Value *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(Term, Cond, WC, IfTrueBB, IfFalseBB) &&
+ IfTrueBB == BB)
+ return cast<BranchInst>(Term);
+ }
+ return nullptr;
+}
+
+/// Return the minimum of all analyzeable exit counts. This is an upper bound
+/// on the actual exit count. If there are not at least two analyzeable exits,
+/// returns SCEVCouldNotCompute.
+static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,
+ DominatorTree &DT,
+ Loop *L) {
+ SmallVector<BasicBlock *, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ SmallVector<const SCEV *, 4> ExitCounts;
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+ assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
+ "We should only have known counts for exiting blocks that "
+ "dominate latch!");
+ ExitCounts.push_back(ExitCount);
+ }
+ if (ExitCounts.size() < 2)
+ return SE.getCouldNotCompute();
+ return SE.getUMinFromMismatchedTypes(ExitCounts);
+}
+
+/// Return true if we can be fairly sure that executing block BB will probably
+/// lead to executing an __llvm_deoptimize. This is a profitability heuristic,
+/// not a legality constraint.
+static bool isVeryLikelyToDeopt(BasicBlock *BB) {
+ while (BB->getUniqueSuccessor())
+ // Will skip side effects, that's okay
+ BB = BB->getUniqueSuccessor();
+
+ return BB->getTerminatingDeoptimizeCall();
+}
+
+/// This implements an analogous, but entirely distinct transform from the main
+/// loop predication transform. This one is phrased in terms of using a
+/// widenable branch *outside* the loop to allow us to simplify loop exits in a
+/// following loop. This is close in spirit to the IndVarSimplify transform
+/// of the same name, but is materially different widening loosens legality
+/// sharply.
+bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
+ // The transformation performed here aims to widen a widenable condition
+ // above the loop such that all analyzeable exit leading to deopt are dead.
+ // It assumes that the latch is the dominant exit for profitability and that
+ // exits branching to deoptimizing blocks are rarely taken. It relies on the
+ // semantics of widenable expressions for legality. (i.e. being able to fall
+ // down the widenable path spuriously allows us to ignore exit order,
+ // unanalyzeable exits, side effects, exceptional exits, and other challenges
+ // which restrict the applicability of the non-WC based version of this
+ // transform in IndVarSimplify.)
+ //
+ // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may
+ // imply flags on the expression being hoisted and inserting new uses (flags
+ // are only correct for current uses). The result is that we may be
+ // inserting a branch on the value which can be either poison or undef. In
+ // this case, the branch can legally go either way; we just need to avoid
+ // introducing UB. This is achieved through the use of the freeze
+ // instruction.
+
+ SmallVector<BasicBlock *, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ if (ExitingBlocks.empty())
+ return false; // Nothing to do.
+
+ auto *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+
+ auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);
+ if (!WidenableBR)
+ return false;
+
+ const SCEV *LatchEC = SE->getExitCount(L, Latch);
+ if (isa<SCEVCouldNotCompute>(LatchEC))
+ return false; // profitability - want hot exit in analyzeable set
+
+ // At this point, we have found an analyzeable latch, and a widenable
+ // condition above the loop. If we have a widenable exit within the loop
+ // (for which we can't compute exit counts), drop the ability to further
+ // widen so that we gain ability to analyze it's exit count and perform this
+ // transform. TODO: It'd be nice to know for sure the exit became
+ // analyzeable after dropping widenability.
+ {
+ bool Invalidate = false;
+
+ for (auto *ExitingBB : ExitingBlocks) {
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+
+ Use *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
+ L->contains(IfTrueBB)) {
+ WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
+ Invalidate = true;
+ }
+ }
+ if (Invalidate)
+ SE->forgetLoop(L);
+ }
+
+ // The use of umin(all analyzeable exits) instead of latch is subtle, but
+ // important for profitability. We may have a loop which hasn't been fully
+ // canonicalized just yet. If the exit we chose to widen is provably never
+ // taken, we want the widened form to *also* be provably never taken. We
+ // can't guarantee this as a current unanalyzeable exit may later become
+ // analyzeable, but we can at least avoid the obvious cases.
+ const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
+ if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
+ !SE->isLoopInvariant(MinEC, L) ||
+ !isSafeToExpandAt(MinEC, WidenableBR, *SE))
+ return false;
+
+ // Subtlety: We need to avoid inserting additional uses of the WC. We know
+ // that it can only have one transitive use at the moment, and thus moving
+ // that use to just before the branch and inserting code before it and then
+ // modifying the operand is legal.
+ auto *IP = cast<Instruction>(WidenableBR->getCondition());
+ IP->moveBefore(WidenableBR);
+ Rewriter.setInsertPoint(IP);
+ IRBuilder<> B(IP);
+
+ bool Changed = false;
+ Value *MinECV = nullptr; // lazily generated if needed
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ // If our exiting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ // Can't rewrite non-branch yet.
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+
+ // If already constant, nothing to do.
+ if (isa<Constant>(BI->getCondition()))
+ continue;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount) ||
+ ExitCount->getType()->isPointerTy() ||
+ !isSafeToExpandAt(ExitCount, WidenableBR, *SE))
+ continue;
+
+ const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);
+ if (!isVeryLikelyToDeopt(ExitBB))
+ // Profitability: indicator of rarely/never taken exit
+ continue;
+
+ // If we found a widenable exit condition, do two things:
+ // 1) fold the widened exit test into the widenable condition
+ // 2) fold the branch to untaken - avoids infinite looping
+
+ Value *ECV = Rewriter.expandCodeFor(ExitCount);
+ if (!MinECV)
+ MinECV = Rewriter.expandCodeFor(MinEC);
+ Value *RHS = MinECV;
+ if (ECV->getType() != RHS->getType()) {
+ Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
+ ECV = B.CreateZExt(ECV, WiderTy);
+ RHS = B.CreateZExt(RHS, WiderTy);
+ }
+ assert(!Latch || DT->dominates(ExitingBB, Latch));
+ Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);
+ // Freeze poison or undef to an arbitrary bit pattern to ensure we can
+ // branch without introducing UB. See NOTE ON POISON/UNDEF above for
+ // context.
+ NewCond = B.CreateFreeze(NewCond);
+
+ widenWidenableBranch(WidenableBR, NewCond);
+
+ Value *OldCond = BI->getCondition();
+ BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
+ Changed = true;
+ }
+
+ if (Changed)
+ // We just mutated a bunch of loop exits changing there exit counts
+ // widely. We need to force recomputation of the exit counts given these
+ // changes. Note that all of the inserted exits are never taken, and
+ // should be removed next time the CFG is modified.
+ SE->forgetLoop(L);
+ return Changed;
+}
+
bool LoopPredication::runOnLoop(Loop *Loop) {
L = Loop;
@@ -1004,16 +1241,12 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
cast<BranchInst>(BB->getTerminator()));
}
- if (Guards.empty() && GuardsAsWidenableBranches.empty())
- return false;
-
SCEVExpander Expander(*SE, *DL, "loop-predication");
-
bool Changed = false;
for (auto *Guard : Guards)
Changed |= widenGuardConditions(Guard, Expander);
for (auto *Guard : GuardsAsWidenableBranches)
Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
-
+ Changed |= predicateLoopExits(L, Expander);
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 96e2c2a3ac6b7..da13a342ae123 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -53,6 +53,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <cstddef>
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 94517996df392..0868e742f4eee 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -18,6 +18,8 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 299f3fc5fb195..b27e65e0adb73 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -30,6 +30,8 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils.h"
@@ -660,6 +662,9 @@ static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
// Merge Succ into Pred and delete it.
MergeBlockIntoPredecessor(Succ, &DTU, &LI, MSSAU);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
Changed = true;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 65e0dee0225ae..1c03a4bf6c021 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -41,14 +41,15 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 7f119175c4a82..e9f368628a087 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -74,7 +74,6 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -97,6 +96,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -108,6 +108,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -115,8 +116,8 @@
#include <cstdlib>
#include <iterator>
#include <limits>
-#include <numeric>
#include <map>
+#include <numeric>
#include <utility>
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 8d88be4203141..92ad8dafa5abc 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -426,51 +427,76 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return UnrollResult;
}
+static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ AssumptionCache &AC, DependenceInfo &DI,
+ OptimizationRemarkEmitter &ORE,
+ int OptLevel) {
+ bool DidSomething = false;
+
+ // The loop unroll and jam pass requires loops to be in simplified form, and also needs LCSSA.
+ // Since simplification may add new inner loops, it has to run before the
+ // legality and profitability checks. This means running the loop unroll and jam pass
+ // will simplify all loops, regardless of whether anything end up being
+ // unroll and jammed.
+ for (auto &L : LI) {
+ DidSomething |=
+ simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
+ DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE);
+ }
+
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ internal::appendLoopsToWorklist(reverse(LI), Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ formLCSSA(*L, DT, &LI, &SE);
+ LoopUnrollResult Result =
+ tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
+ if (Result != LoopUnrollResult::Unmodified)
+ DidSomething = true;
+ }
+
+ return DidSomething;
+}
+
namespace {
-class LoopUnrollAndJam : public LoopPass {
+class LoopUnrollAndJam : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
unsigned OptLevel;
- LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
+ LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) {
initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
return false;
- Function &F = *L->getHeader()->getParent();
-
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but ORE cannot be preserved (see comment before the pass definition).
- OptimizationRemarkEmitter ORE(&F);
-
- LoopUnrollResult Result =
- tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- if (Result == LoopUnrollResult::FullyUnrolled)
- LPM.markLoopAsDeleted(*L);
-
- return Result != LoopUnrollResult::Unmodified;
+ return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
}
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DependenceAnalysisWrapperPass>();
- getLoopAnalysisUsage(AU);
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
};
@@ -480,10 +506,13 @@ char LoopUnrollAndJam::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
@@ -491,26 +520,18 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
return new LoopUnrollAndJam(OptLevel);
}
-PreservedAnalyses LoopUnrollAndJamPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function *F = L.getHeader()->getParent();
-
- auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
- // FIXME: This should probably be optional rather than required.
- if (!ORE)
- report_fatal_error(
- "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at "
- "a higher level");
-
- DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
-
- LoopUnrollResult Result = tryToUnrollAndJamLoop(
- &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel);
-
- if (Result == LoopUnrollResult::Unmodified)
+PreservedAnalyses LoopUnrollAndJamPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ DependenceInfo &DI = AM.getResult<DependenceAnalysis>(F);
+ OptimizationRemarkEmitter &ORE =
+ AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a6d4164c36455..4c2b079c6bb5b 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -46,6 +46,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -212,7 +213,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass);
if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index b410df0c5f68c..915e053704b23 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -59,6 +59,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -683,7 +684,7 @@ bool LoopUnswitch::processCurrentLoop() {
for (auto &I : *BB) {
auto CS = CallSite(&I);
if (!CS) continue;
- if (CS.hasFnAttr(Attribute::Convergent))
+ if (CS.isConvergent())
return false;
if (auto *II = dyn_cast<InvokeInst>(&I))
if (!II->getUnwindDest()->canSplitPredecessors())
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 2ccb7cae3079e..7b9af527d444e 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -79,6 +79,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index e076424d90425..ab7b85e89e7bd 100644
--- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index d0fcf38b5a7bd..21c6c32e8e02a 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index d85f20b3f80c1..53671c7bc3d15 100644
--- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 9489e01774d64..45f5929e3b903 100644
--- a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
@@ -60,7 +61,7 @@ static bool lowerGuardIntrinsic(Function &F) {
DeoptIntrinsic->setCallingConv(GuardDecl->getCallingConv());
for (auto *CI : ToLower) {
- makeGuardControlFlowExplicit(DeoptIntrinsic, CI);
+ makeGuardControlFlowExplicit(DeoptIntrinsic, CI, false);
CI->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
new file mode 100644
index 0000000000000..0ff6ee8bcfcc2
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -0,0 +1,894 @@
+//===- LowerMatrixIntrinsics.cpp - Lower matrix intrinsics -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Lower matrix intrinsics to vector operations.
+//
+// TODO:
+// * Implement multiply & add fusion
+// * Add remark, summarizing the available matrix optimization opportunities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "lower-matrix-intrinsics"
+
+static cl::opt<bool> EnableShapePropagation("matrix-propagate-shape",
+ cl::init(true));
+
+static cl::opt<bool> AllowContractEnabled(
+ "matrix-allow-contract", cl::init(false), cl::Hidden,
+ cl::desc("Allow the use of FMAs if available and profitable. This may "
+ "result in different results, due to less rounding error."));
+
+namespace {
+
+// Given an element poitner \p BasePtr to the start of a (sub) matrix, compute
+// the start address of column \p Col with type (\p EltType x \p NumRows)
+// assuming \p Stride elements between start two consecutive columns.
+// \p Stride must be >= \p NumRows.
+//
+// Consider a 4x4 matrix like below
+//
+// 0 1 2 3
+// 0 v_0_0 v_0_1 v_0_2 v_0_3
+// 1 v_1_0 v_1_1 v_1_2 v_1_3
+// 2 v_2_0 v_2_1 v_2_2 v_2_3
+// 3 v_3_0 v_3_1 v_3_2 v_3_3
+
+// To compute the column addresses for a 2x3 sub-matrix at row 1 and column 1,
+// we need a pointer to the first element of the submatrix as base pointer.
+// Then we can use computeColumnAddr to compute the addresses for the columns
+// of the sub-matrix.
+//
+// Column 0: computeColumnAddr(Base, 0 (column), 4 (stride), 2 (num rows), ..)
+// -> just returns Base
+// Column 1: computeColumnAddr(Base, 1 (column), 4 (stride), 2 (num rows), ..)
+// -> returns Base + (1 * 4)
+// Column 2: computeColumnAddr(Base, 2 (column), 4 (stride), 2 (num rows), ..)
+// -> returns Base + (2 * 4)
+//
+// The graphic below illustrates the number of elements in a column (marked
+// with |) and the number of skipped elements (marked with }).
+//
+// v_0_0 v_0_1 {v_0_2 {v_0_3
+// Base Col 1 Col 2
+// | | |
+// v_1_0 |v_1_1 |v_1_2 |v_1_3
+// v_2_0 |v_2_1 |v_2_2 |v_2_3
+// v_3_0 {v_3_1 {v_3_2 v_3_3
+//
+Value *computeColumnAddr(Value *BasePtr, Value *Col, Value *Stride,
+ unsigned NumRows, Type *EltType,
+ IRBuilder<> &Builder) {
+
+ assert((!isa<ConstantInt>(Stride) ||
+ cast<ConstantInt>(Stride)->getZExtValue() >= NumRows) &&
+ "Stride must be >= the number of rows.");
+ unsigned AS = cast<PointerType>(BasePtr->getType())->getAddressSpace();
+
+ // Compute the start of the column with index Col as Col * Stride.
+ Value *ColumnStart = Builder.CreateMul(Col, Stride, "col.start");
+
+ // Get pointer to the start of the selected column. Skip GEP creation,
+ // if we select column 0.
+ if (isa<ConstantInt>(ColumnStart) && cast<ConstantInt>(ColumnStart)->isZero())
+ ColumnStart = BasePtr;
+ else
+ ColumnStart = Builder.CreateGEP(EltType, BasePtr, ColumnStart, "col.gep");
+
+ // Cast elementwise column start pointer to a pointer to a column
+ // (EltType x NumRows)*.
+ Type *ColumnType = VectorType::get(EltType, NumRows);
+ Type *ColumnPtrType = PointerType::get(ColumnType, AS);
+ return Builder.CreatePointerCast(ColumnStart, ColumnPtrType, "col.cast");
+}
+
+/// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics.
+///
+/// Currently, the lowering for each matrix intrinsic is done as follows:
+/// 1. Propagate the shape information from intrinsics to connected
+/// instructions.
+/// 2. Lower instructions with shape information.
+/// 2.1. Get column vectors for each argument. If we already lowered the
+/// definition of an argument, use the produced column vectors directly.
+/// If not, split the operand vector containing an embedded matrix into
+/// a set of column vectors,
+/// 2.2. Lower the instruction in terms of columnwise operations, which yields
+/// a set of column vectors containing result matrix. Note that we lower
+/// all instructions that have shape information. Besides the intrinsics,
+/// this includes stores for example.
+/// 2.3. Update uses of the lowered instruction. If we have shape information
+/// for a user, there is nothing to do, as we will look up the result
+/// column matrix when lowering the user. For other uses, we embed the
+/// result matrix in a flat vector and update the use.
+/// 2.4. Cache the result column matrix for the instruction we lowered
+/// 3. After we lowered all instructions in a function, remove the now
+/// obsolete instructions.
+///
+class LowerMatrixIntrinsics {
+ Function &Func;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+
+ /// Wrapper class representing a matrix as a set of column vectors.
+ /// All column vectors must have the same vector type.
+ class ColumnMatrixTy {
+ SmallVector<Value *, 16> Columns;
+
+ public:
+ ColumnMatrixTy() : Columns() {}
+ ColumnMatrixTy(ArrayRef<Value *> Cols)
+ : Columns(Cols.begin(), Cols.end()) {}
+
+ Value *getColumn(unsigned i) const { return Columns[i]; }
+
+ void setColumn(unsigned i, Value *V) { Columns[i] = V; }
+
+ size_t getNumColumns() const { return Columns.size(); }
+ size_t getNumRows() const {
+ assert(Columns.size() > 0 && "Cannot call getNumRows without columns");
+ return cast<VectorType>(Columns[0]->getType())->getNumElements();
+ }
+
+ const SmallVectorImpl<Value *> &getColumnVectors() const { return Columns; }
+
+ SmallVectorImpl<Value *> &getColumnVectors() { return Columns; }
+
+ void addColumn(Value *V) { Columns.push_back(V); }
+
+ iterator_range<SmallVector<Value *, 8>::iterator> columns() {
+ return make_range(Columns.begin(), Columns.end());
+ }
+
+ /// Embed the columns of the matrix into a flat vector by concatenating
+ /// them.
+ Value *embedInVector(IRBuilder<> &Builder) const {
+ return Columns.size() == 1 ? Columns[0]
+ : concatenateVectors(Builder, Columns);
+ }
+ };
+
+ struct ShapeInfo {
+ unsigned NumRows;
+ unsigned NumColumns;
+
+ ShapeInfo(unsigned NumRows = 0, unsigned NumColumns = 0)
+ : NumRows(NumRows), NumColumns(NumColumns) {}
+
+ ShapeInfo(Value *NumRows, Value *NumColumns)
+ : NumRows(cast<ConstantInt>(NumRows)->getZExtValue()),
+ NumColumns(cast<ConstantInt>(NumColumns)->getZExtValue()) {}
+
+ bool operator==(const ShapeInfo &other) {
+ return NumRows == other.NumRows && NumColumns == other.NumColumns;
+ }
+ bool operator!=(const ShapeInfo &other) { return !(*this == other); }
+
+ /// Returns true if shape-information is defined, meaning both dimensions
+ /// are != 0.
+ operator bool() const {
+ assert(NumRows == 0 || NumColumns != 0);
+ return NumRows != 0;
+ }
+ };
+
+ /// Maps instructions to their shape information. The shape information
+ /// describes the shape to be used while lowering. This matches the shape of
+ /// the result value of the instruction, with the only exceptions being store
+ /// instructions and the matrix_columnwise_store intrinsics. For those, the
+ /// shape information indicates that those instructions should be lowered
+ /// using shape information as well.
+ DenseMap<Value *, ShapeInfo> ShapeMap;
+
+ /// List of instructions to remove. While lowering, we are not replacing all
+ /// users of a lowered instruction, if shape information is available and
+ /// those need to be removed after we finished lowering.
+ SmallVector<Instruction *, 16> ToRemove;
+
+ /// Map from instructions to their produced column matrix.
+ DenseMap<Value *, ColumnMatrixTy> Inst2ColumnMatrix;
+
+public:
+ LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI)
+ : Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI) {}
+
+ /// Return the set of column vectors that a matrix value is lowered to.
+ ///
+ /// If we lowered \p MatrixVal, just return the cache result column matrix.
+ /// Otherwie split the flat vector \p MatrixVal containing a matrix with
+ /// shape \p SI into column vectors.
+ ColumnMatrixTy getMatrix(Value *MatrixVal, const ShapeInfo &SI,
+ IRBuilder<> Builder) {
+ VectorType *VType = dyn_cast<VectorType>(MatrixVal->getType());
+ assert(VType && "MatrixVal must be a vector type");
+ assert(VType->getNumElements() == SI.NumRows * SI.NumColumns &&
+ "The vector size must match the number of matrix elements");
+
+ // Check if we lowered MatrixVal using shape information. In that case,
+ // return the existing column matrix, if it matches the requested shape
+ // information. If there is a mis-match, embed the result in a flat
+ // vector and split it later.
+ auto Found = Inst2ColumnMatrix.find(MatrixVal);
+ if (Found != Inst2ColumnMatrix.end()) {
+ ColumnMatrixTy &M = Found->second;
+ // Return the found matrix, if its shape matches the requested shape
+ // information
+ if (SI.NumRows == M.getNumRows() && SI.NumColumns == M.getNumColumns())
+ return M;
+
+ MatrixVal = M.embedInVector(Builder);
+ }
+
+ // Otherwise split MatrixVal.
+ SmallVector<Value *, 16> SplitVecs;
+ Value *Undef = UndefValue::get(VType);
+ for (unsigned MaskStart = 0; MaskStart < VType->getNumElements();
+ MaskStart += SI.NumRows) {
+ Constant *Mask = createSequentialMask(Builder, MaskStart, SI.NumRows, 0);
+ Value *V = Builder.CreateShuffleVector(MatrixVal, Undef, Mask, "split");
+ SplitVecs.push_back(V);
+ }
+
+ return {SplitVecs};
+ }
+
+ /// If \p V already has a known shape return false. Otherwise set the shape
+ /// for instructions that support it.
+ bool setShapeInfo(Value *V, ShapeInfo Shape) {
+ assert(Shape && "Shape not set");
+ if (isa<UndefValue>(V) || !supportsShapeInfo(V))
+ return false;
+
+ auto SIter = ShapeMap.find(V);
+ if (SIter != ShapeMap.end()) {
+ LLVM_DEBUG(dbgs() << " not overriding existing shape: "
+ << SIter->second.NumRows << " "
+ << SIter->second.NumColumns << " for " << *V << "\n");
+ return false;
+ }
+
+ ShapeMap.insert({V, Shape});
+ LLVM_DEBUG(dbgs() << " " << Shape.NumRows << " x " << Shape.NumColumns
+ << " for " << *V << "\n");
+ return true;
+ }
+
+ bool isUniformShape(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul: // Scalar multiply.
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::Sub:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /// Returns true if shape information can be used for \p V. The supported
+ /// instructions must match the instructions that can be lowered by this pass.
+ bool supportsShapeInfo(Value *V) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return false;
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II)
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::matrix_multiply:
+ case Intrinsic::matrix_transpose:
+ case Intrinsic::matrix_columnwise_load:
+ case Intrinsic::matrix_columnwise_store:
+ return true;
+ default:
+ return false;
+ }
+ return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
+ }
+
+ /// Propagate the shape information of instructions to their users.
+ /// The work list contains instructions for which we can compute the shape,
+ /// either based on the information provided by matrix intrinsics or known
+ /// shapes of operands.
+ SmallVector<Instruction *, 32>
+ propagateShapeForward(SmallVectorImpl<Instruction *> &WorkList) {
+ SmallVector<Instruction *, 32> NewWorkList;
+ // Pop an element for which we guaranteed to have at least one of the
+ // operand shapes. Add the shape for this and then add users to the work
+ // list.
+ LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n");
+ while (!WorkList.empty()) {
+ Instruction *Inst = WorkList.back();
+ WorkList.pop_back();
+
+ // New entry, set the value and insert operands
+ bool Propagate = false;
+
+ Value *MatrixA;
+ Value *MatrixB;
+ Value *M;
+ Value *N;
+ Value *K;
+ if (match(Inst, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(MatrixA), m_Value(MatrixB), m_Value(M),
+ m_Value(N), m_Value(K)))) {
+ Propagate = setShapeInfo(Inst, {M, K});
+ } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_transpose>(
+ m_Value(MatrixA), m_Value(M), m_Value(N)))) {
+ // Flip dimensions.
+ Propagate = setShapeInfo(Inst, {N, M});
+ } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_columnwise_store>(
+ m_Value(MatrixA), m_Value(), m_Value(),
+ m_Value(M), m_Value(N)))) {
+ Propagate = setShapeInfo(Inst, {N, M});
+ } else if (match(Inst,
+ m_Intrinsic<Intrinsic::matrix_columnwise_load>(
+ m_Value(), m_Value(), m_Value(M), m_Value(N)))) {
+ Propagate = setShapeInfo(Inst, {M, N});
+ } else if (match(Inst, m_Store(m_Value(MatrixA), m_Value()))) {
+ auto OpShape = ShapeMap.find(MatrixA);
+ if (OpShape != ShapeMap.end())
+ setShapeInfo(Inst, OpShape->second);
+ continue;
+ } else if (isUniformShape(Inst)) {
+ // Find the first operand that has a known shape and use that.
+ for (auto &Op : Inst->operands()) {
+ auto OpShape = ShapeMap.find(Op.get());
+ if (OpShape != ShapeMap.end()) {
+ Propagate |= setShapeInfo(Inst, OpShape->second);
+ break;
+ }
+ }
+ }
+
+ if (Propagate) {
+ NewWorkList.push_back(Inst);
+ for (auto *User : Inst->users())
+ if (ShapeMap.count(User) == 0)
+ WorkList.push_back(cast<Instruction>(User));
+ }
+ }
+
+ return NewWorkList;
+ }
+
+ /// Propagate the shape to operands of instructions with shape information.
+ /// \p Worklist contains the instruction for which we already know the shape.
+ SmallVector<Instruction *, 32>
+ propagateShapeBackward(SmallVectorImpl<Instruction *> &WorkList) {
+ SmallVector<Instruction *, 32> NewWorkList;
+
+ auto pushInstruction = [](Value *V,
+ SmallVectorImpl<Instruction *> &WorkList) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I)
+ WorkList.push_back(I);
+ };
+ // Pop an element with known shape. Traverse the operands, if their shape
+ // derives from the result shape and is unknown, add it and add them to the
+ // worklist.
+ LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n");
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+
+ size_t BeforeProcessingV = WorkList.size();
+ if (!isa<Instruction>(V))
+ continue;
+
+ Value *MatrixA;
+ Value *MatrixB;
+ Value *M;
+ Value *N;
+ Value *K;
+ if (match(V, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(MatrixA), m_Value(MatrixB), m_Value(M),
+ m_Value(N), m_Value(K)))) {
+ if (setShapeInfo(MatrixA, {M, N}))
+ pushInstruction(MatrixA, WorkList);
+
+ if (setShapeInfo(MatrixB, {N, K}))
+ pushInstruction(MatrixB, WorkList);
+
+ } else if (match(V, m_Intrinsic<Intrinsic::matrix_transpose>(
+ m_Value(MatrixA), m_Value(M), m_Value(N)))) {
+ // Flip dimensions.
+ if (setShapeInfo(MatrixA, {M, N}))
+ pushInstruction(MatrixA, WorkList);
+ } else if (match(V, m_Intrinsic<Intrinsic::matrix_columnwise_store>(
+ m_Value(MatrixA), m_Value(), m_Value(),
+ m_Value(M), m_Value(N)))) {
+ if (setShapeInfo(MatrixA, {M, N})) {
+ pushInstruction(MatrixA, WorkList);
+ }
+ } else if (isa<LoadInst>(V) ||
+ match(V, m_Intrinsic<Intrinsic::matrix_columnwise_load>())) {
+ // Nothing to do, no matrix input.
+ } else if (isa<StoreInst>(V)) {
+ // Nothing to do. We forward-propagated to this so we would just
+ // backward propagate to an instruction with an already known shape.
+ } else if (isUniformShape(V)) {
+ // Propagate to all operands.
+ ShapeInfo Shape = ShapeMap[V];
+ for (Use &U : cast<Instruction>(V)->operands()) {
+ if (setShapeInfo(U.get(), Shape))
+ pushInstruction(U.get(), WorkList);
+ }
+ }
+ // After we discovered new shape info for new instructions in the
+ // worklist, we use their users as seeds for the next round of forward
+ // propagation.
+ for (size_t I = BeforeProcessingV; I != WorkList.size(); I++)
+ for (User *U : WorkList[I]->users())
+ if (isa<Instruction>(U) && V != U)
+ NewWorkList.push_back(cast<Instruction>(U));
+ }
+ return NewWorkList;
+ }
+
+ bool Visit() {
+ if (EnableShapePropagation) {
+ SmallVector<Instruction *, 32> WorkList;
+
+ // Initially only the shape of matrix intrinsics is known.
+ // Initialize the work list with ops carrying shape information.
+ for (BasicBlock &BB : Func)
+ for (Instruction &Inst : BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!II)
+ continue;
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::matrix_multiply:
+ case Intrinsic::matrix_transpose:
+ case Intrinsic::matrix_columnwise_load:
+ case Intrinsic::matrix_columnwise_store:
+ WorkList.push_back(&Inst);
+ break;
+ default:
+ break;
+ }
+ }
+ // Propagate shapes until nothing changes any longer.
+ while (!WorkList.empty()) {
+ WorkList = propagateShapeForward(WorkList);
+ WorkList = propagateShapeBackward(WorkList);
+ }
+ }
+
+ ReversePostOrderTraversal<Function *> RPOT(&Func);
+ bool Changed = false;
+ for (auto *BB : RPOT) {
+ for (Instruction &Inst : make_early_inc_range(*BB)) {
+ IRBuilder<> Builder(&Inst);
+
+ if (CallInst *CInst = dyn_cast<CallInst>(&Inst))
+ Changed |= VisitCallInst(CInst);
+
+ Value *Op1;
+ Value *Op2;
+ if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
+ Changed |= VisitBinaryOperator(BinOp);
+ if (match(&Inst, m_Load(m_Value(Op1))))
+ Changed |= VisitLoad(&Inst, Op1, Builder);
+ else if (match(&Inst, m_Store(m_Value(Op1), m_Value(Op2))))
+ Changed |= VisitStore(&Inst, Op1, Op2, Builder);
+ }
+ }
+
+ for (Instruction *Inst : reverse(ToRemove))
+ Inst->eraseFromParent();
+
+ return Changed;
+ }
+
+ LoadInst *createColumnLoad(Value *ColumnPtr, Type *EltType,
+ IRBuilder<> Builder) {
+ unsigned Align = DL.getABITypeAlignment(EltType);
+ return Builder.CreateAlignedLoad(ColumnPtr, Align, "col.load");
+ }
+
+ StoreInst *createColumnStore(Value *ColumnValue, Value *ColumnPtr,
+ Type *EltType, IRBuilder<> Builder) {
+ unsigned Align = DL.getABITypeAlignment(EltType);
+ return Builder.CreateAlignedStore(ColumnValue, ColumnPtr, Align);
+ }
+
+
+ /// Turns \p BasePtr into an elementwise pointer to \p EltType.
+ Value *createElementPtr(Value *BasePtr, Type *EltType, IRBuilder<> &Builder) {
+ unsigned AS = cast<PointerType>(BasePtr->getType())->getAddressSpace();
+ Type *EltPtrType = PointerType::get(EltType, AS);
+ return Builder.CreatePointerCast(BasePtr, EltPtrType);
+ }
+
+ /// Replace intrinsic calls
+ bool VisitCallInst(CallInst *Inst) {
+ if (!Inst->getCalledFunction() || !Inst->getCalledFunction()->isIntrinsic())
+ return false;
+
+ switch (Inst->getCalledFunction()->getIntrinsicID()) {
+ case Intrinsic::matrix_multiply:
+ LowerMultiply(Inst);
+ break;
+ case Intrinsic::matrix_transpose:
+ LowerTranspose(Inst);
+ break;
+ case Intrinsic::matrix_columnwise_load:
+ LowerColumnwiseLoad(Inst);
+ break;
+ case Intrinsic::matrix_columnwise_store:
+ LowerColumnwiseStore(Inst);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ void LowerLoad(Instruction *Inst, Value *Ptr, Value *Stride,
+ ShapeInfo Shape) {
+ IRBuilder<> Builder(Inst);
+ auto VType = cast<VectorType>(Inst->getType());
+ Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
+ ColumnMatrixTy Result;
+ // Distance between start of one column and the start of the next
+ for (unsigned C = 0, E = Shape.NumColumns; C < E; ++C) {
+ Value *GEP =
+ computeColumnAddr(EltPtr, Builder.getInt32(C), Stride, Shape.NumRows,
+ VType->getElementType(), Builder);
+ Value *Column = createColumnLoad(GEP, VType->getElementType(), Builder);
+ Result.addColumn(Column);
+ }
+
+ finalizeLowering(Inst, Result, Builder);
+ }
+
+ /// Lowers llvm.matrix.columnwise.load.
+ ///
+ /// The intrinsic loads a matrix from memory using a stride between columns.
+ void LowerColumnwiseLoad(CallInst *Inst) {
+ Value *Ptr = Inst->getArgOperand(0);
+ Value *Stride = Inst->getArgOperand(1);
+ LowerLoad(Inst, Ptr, Stride,
+ {Inst->getArgOperand(2), Inst->getArgOperand(3)});
+ }
+
+ void LowerStore(Instruction *Inst, Value *Matrix, Value *Ptr, Value *Stride,
+ ShapeInfo Shape) {
+ IRBuilder<> Builder(Inst);
+ auto VType = cast<VectorType>(Matrix->getType());
+ Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
+ auto LM = getMatrix(Matrix, Shape, Builder);
+ for (auto C : enumerate(LM.columns())) {
+ Value *GEP =
+ computeColumnAddr(EltPtr, Builder.getInt32(C.index()), Stride,
+ Shape.NumRows, VType->getElementType(), Builder);
+ createColumnStore(C.value(), GEP, VType->getElementType(), Builder);
+ }
+
+ ToRemove.push_back(Inst);
+ }
+
+ /// Lowers llvm.matrix.columnwise.store.
+ ///
+ /// The intrinsic store a matrix back memory using a stride between columns.
+ void LowerColumnwiseStore(CallInst *Inst) {
+ Value *Matrix = Inst->getArgOperand(0);
+ Value *Ptr = Inst->getArgOperand(1);
+ Value *Stride = Inst->getArgOperand(2);
+ LowerStore(Inst, Matrix, Ptr, Stride,
+ {Inst->getArgOperand(3), Inst->getArgOperand(4)});
+ }
+
+ /// Extract a column vector of \p NumElts starting at index (\p I, \p J) from
+ /// the matrix \p LM represented as a vector of column vectors.
+ Value *extractVector(const ColumnMatrixTy &LM, unsigned I, unsigned J,
+ unsigned NumElts, IRBuilder<> Builder) {
+ Value *Col = LM.getColumn(J);
+ Value *Undef = UndefValue::get(Col->getType());
+ Constant *Mask = createSequentialMask(Builder, I, NumElts, 0);
+ return Builder.CreateShuffleVector(Col, Undef, Mask, "block");
+ }
+
+ // Set elements I..I+NumElts-1 to Block
+ Value *insertVector(Value *Col, unsigned I, Value *Block,
+ IRBuilder<> Builder) {
+
+ // First, bring Block to the same size as Col
+ unsigned BlockNumElts =
+ cast<VectorType>(Block->getType())->getNumElements();
+ unsigned NumElts = cast<VectorType>(Col->getType())->getNumElements();
+ assert(NumElts >= BlockNumElts && "Too few elements for current block");
+
+ Value *ExtendMask =
+ createSequentialMask(Builder, 0, BlockNumElts, NumElts - BlockNumElts);
+ Value *Undef = UndefValue::get(Block->getType());
+ Block = Builder.CreateShuffleVector(Block, Undef, ExtendMask);
+
+ // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
+ // 8, 4, 5, 6
+ SmallVector<Constant *, 16> Mask;
+ unsigned i;
+ for (i = 0; i < I; i++)
+ Mask.push_back(Builder.getInt32(i));
+
+ unsigned VecNumElts = cast<VectorType>(Col->getType())->getNumElements();
+ for (; i < I + BlockNumElts; i++)
+ Mask.push_back(Builder.getInt32(i - I + VecNumElts));
+
+ for (; i < VecNumElts; i++)
+ Mask.push_back(Builder.getInt32(i));
+
+ Value *MaskVal = ConstantVector::get(Mask);
+
+ return Builder.CreateShuffleVector(Col, Block, MaskVal);
+ }
+
+ Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp,
+ IRBuilder<> &Builder, bool AllowContraction) {
+
+ if (!Sum)
+ return UseFPOp ? Builder.CreateFMul(A, B) : Builder.CreateMul(A, B);
+
+ if (UseFPOp) {
+ if (AllowContraction) {
+ // Use fmuladd for floating point operations and let the backend decide
+ // if that's profitable.
+ Value *FMulAdd = Intrinsic::getDeclaration(
+ Func.getParent(), Intrinsic::fmuladd, A->getType());
+ return Builder.CreateCall(FMulAdd, {A, B, Sum});
+ }
+ Value *Mul = Builder.CreateFMul(A, B);
+ return Builder.CreateFAdd(Sum, Mul);
+ }
+
+ Value *Mul = Builder.CreateMul(A, B);
+ return Builder.CreateAdd(Sum, Mul);
+ }
+
+ /// Cache \p Matrix as result of \p Inst and update the uses of \p Inst. For
+ /// users with shape information, there's nothing to do: the will use the
+ /// cached value when they are lowered. For other users, \p Matrix is
+ /// flattened and the uses are updated to use it. Also marks \p Inst for
+ /// deletion.
+ void finalizeLowering(Instruction *Inst, ColumnMatrixTy Matrix,
+ IRBuilder<> &Builder) {
+ Inst2ColumnMatrix.insert(std::make_pair(Inst, Matrix));
+
+ ToRemove.push_back(Inst);
+ Value *Flattened = nullptr;
+ for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
+ Use &U = *I++;
+ if (ShapeMap.find(U.getUser()) == ShapeMap.end()) {
+ if (!Flattened)
+ Flattened = Matrix.embedInVector(Builder);
+ U.set(Flattened);
+ }
+ }
+ }
+
+ /// Lowers llvm.matrix.multiply.
+ void LowerMultiply(CallInst *MatMul) {
+ IRBuilder<> Builder(MatMul);
+ auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
+ ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
+ ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
+
+ const ColumnMatrixTy &Lhs =
+ getMatrix(MatMul->getArgOperand(0), LShape, Builder);
+ const ColumnMatrixTy &Rhs =
+ getMatrix(MatMul->getArgOperand(1), RShape, Builder);
+
+ const unsigned R = LShape.NumRows;
+ const unsigned M = LShape.NumColumns;
+ const unsigned C = RShape.NumColumns;
+ assert(M == RShape.NumRows);
+
+ // Initialize the output
+ ColumnMatrixTy Result;
+ for (unsigned J = 0; J < C; ++J)
+ Result.addColumn(UndefValue::get(VectorType::get(EltType, R)));
+
+ const unsigned VF = std::max(TTI.getRegisterBitWidth(true) /
+ EltType->getPrimitiveSizeInBits(),
+ uint64_t(1));
+
+ bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
+ MatMul->hasAllowContract());
+ // Multiply columns from the first operand with scalars from the second
+ // operand. Then move along the K axes and accumulate the columns. With
+ // this the adds can be vectorized without reassociation.
+ for (unsigned J = 0; J < C; ++J) {
+ unsigned BlockSize = VF;
+ for (unsigned I = 0; I < R; I += BlockSize) {
+ // Gradually lower the vectorization factor to cover the remainder.
+ while (I + BlockSize > R)
+ BlockSize /= 2;
+
+ Value *Sum = nullptr;
+ for (unsigned K = 0; K < M; ++K) {
+ Value *L = extractVector(Lhs, I, K, BlockSize, Builder);
+ Value *RH = Builder.CreateExtractElement(Rhs.getColumn(J), K);
+ Value *Splat = Builder.CreateVectorSplat(BlockSize, RH, "splat");
+ Sum = createMulAdd(Sum, L, Splat, EltType->isFloatingPointTy(),
+ Builder, AllowContract);
+ }
+ Result.setColumn(J, insertVector(Result.getColumn(J), I, Sum, Builder));
+ }
+ }
+ finalizeLowering(MatMul, Result, Builder);
+ }
+
+ /// Lowers llvm.matrix.transpose.
+ void LowerTranspose(CallInst *Inst) {
+ ColumnMatrixTy Result;
+ IRBuilder<> Builder(Inst);
+ Value *InputVal = Inst->getArgOperand(0);
+ VectorType *VectorTy = cast<VectorType>(InputVal->getType());
+ ShapeInfo ArgShape(Inst->getArgOperand(1), Inst->getArgOperand(2));
+ ColumnMatrixTy InputMatrix = getMatrix(InputVal, ArgShape, Builder);
+
+ for (unsigned Row = 0; Row < ArgShape.NumRows; ++Row) {
+ // Build a single column vector for this row. First initialize it.
+ Value *ResultColumn = UndefValue::get(
+ VectorType::get(VectorTy->getElementType(), ArgShape.NumColumns));
+
+ // Go through the elements of this row and insert it into the resulting
+ // column vector.
+ for (auto C : enumerate(InputMatrix.columns())) {
+ Value *Elt = Builder.CreateExtractElement(C.value(), Row);
+ // We insert at index Column since that is the row index after the
+ // transpose.
+ ResultColumn =
+ Builder.CreateInsertElement(ResultColumn, Elt, C.index());
+ }
+ Result.addColumn(ResultColumn);
+ }
+
+ finalizeLowering(Inst, Result, Builder);
+ }
+
+ /// Lower load instructions, if shape information is available.
+ bool VisitLoad(Instruction *Inst, Value *Ptr, IRBuilder<> &Builder) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ LowerLoad(Inst, Ptr, Builder.getInt32(I->second.NumRows), I->second);
+ return true;
+ }
+
+ bool VisitStore(Instruction *Inst, Value *StoredVal, Value *Ptr,
+ IRBuilder<> &Builder) {
+ auto I = ShapeMap.find(StoredVal);
+ if (I == ShapeMap.end())
+ return false;
+
+ LowerStore(Inst, StoredVal, Ptr, Builder.getInt32(I->second.NumRows), I->second);
+ return true;
+ }
+
+ /// Lower binary operators, if shape information is available.
+ bool VisitBinaryOperator(BinaryOperator *Inst) {
+ auto I = ShapeMap.find(Inst);
+ if (I == ShapeMap.end())
+ return false;
+
+ Value *Lhs = Inst->getOperand(0);
+ Value *Rhs = Inst->getOperand(1);
+
+ IRBuilder<> Builder(Inst);
+ ShapeInfo &Shape = I->second;
+
+ ColumnMatrixTy LoweredLhs = getMatrix(Lhs, Shape, Builder);
+ ColumnMatrixTy LoweredRhs = getMatrix(Rhs, Shape, Builder);
+
+ // Add each column and store the result back into the opmapping
+ ColumnMatrixTy Result;
+ auto BuildColumnOp = [&Builder, Inst](Value *LHS, Value *RHS) {
+ switch (Inst->getOpcode()) {
+ case Instruction::Add:
+ return Builder.CreateAdd(LHS, RHS);
+ case Instruction::Mul:
+ return Builder.CreateMul(LHS, RHS);
+ case Instruction::Sub:
+ return Builder.CreateSub(LHS, RHS);
+ case Instruction::FAdd:
+ return Builder.CreateFAdd(LHS, RHS);
+ case Instruction::FMul:
+ return Builder.CreateFMul(LHS, RHS);
+ case Instruction::FSub:
+ return Builder.CreateFSub(LHS, RHS);
+ default:
+ llvm_unreachable("Unsupported binary operator for matrix");
+ }
+ };
+ for (unsigned C = 0; C < Shape.NumColumns; ++C)
+ Result.addColumn(
+ BuildColumnOp(LoweredLhs.getColumn(C), LoweredRhs.getColumn(C)));
+
+ finalizeLowering(Inst, Result, Builder);
+ return true;
+ }
+};
+} // namespace
+
+PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ LowerMatrixIntrinsics LMT(F, TTI);
+ if (LMT.Visit()) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ LowerMatrixIntrinsicsLegacyPass() : FunctionPass(ID) {
+ initializeLowerMatrixIntrinsicsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LowerMatrixIntrinsics LMT(F, *TTI);
+ bool C = LMT.Visit();
+ return C;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+} // namespace
+
+static const char pass_name[] = "Lower the matrix intrinsics";
+char LowerMatrixIntrinsicsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
+ false, false)
+INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
+ false, false)
+
+Pass *llvm::createLowerMatrixIntrinsicsPass() {
+ return new LowerMatrixIntrinsicsLegacyPass();
+}
diff --git a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
index 5342f2ddcb6b1..73b2cd06fa230 100644
--- a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
diff --git a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 789232e0f5ce7..5ffae128f5f0f 100644
--- a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -33,10 +33,11 @@
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
@@ -56,23 +57,11 @@ struct MakeGuardsExplicitLegacyPass : public FunctionPass {
static void turnToExplicitForm(CallInst *Guard, Function *DeoptIntrinsic) {
// Replace the guard with an explicit branch (just like in GuardWidening).
- BasicBlock *BB = Guard->getParent();
- makeGuardControlFlowExplicit(DeoptIntrinsic, Guard);
- BranchInst *ExplicitGuard = cast<BranchInst>(BB->getTerminator());
- assert(ExplicitGuard->isConditional() && "Must be!");
+ BasicBlock *OriginalBB = Guard->getParent();
+ (void)OriginalBB;
+ makeGuardControlFlowExplicit(DeoptIntrinsic, Guard, true);
+ assert(isWidenableBranch(OriginalBB->getTerminator()) && "should hold");
- // We want the guard to be expressed as explicit control flow, but still be
- // widenable. For that, we add Widenable Condition intrinsic call to the
- // guard's condition.
- IRBuilder<> B(ExplicitGuard);
- auto *WidenableCondition =
- B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
- {}, {}, nullptr, "widenable_cond");
- WidenableCondition->setCallingConv(Guard->getCallingConv());
- auto *NewCond =
- B.CreateAnd(ExplicitGuard->getCondition(), WidenableCondition);
- NewCond->setName("exiplicit_guard_cond");
- ExplicitGuard->setCondition(NewCond);
Guard->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2364748efb057..c24fa40860eb3 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -24,7 +24,6 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -49,12 +48,14 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -387,16 +388,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
StartPtr = Range.StartPtr;
// Determine alignment
- unsigned Alignment = Range.Alignment;
- if (Alignment == 0) {
- Type *EltType =
- cast<PointerType>(StartPtr->getType())->getElementType();
- Alignment = DL.getABITypeAlignment(EltType);
- }
-
- AMemSet =
- Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+ const Align Alignment = DL.getValueOrABITypeAlignment(
+ MaybeAlign(Range.Alignment),
+ cast<PointerType>(StartPtr->getType())->getElementType());
+ AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
+ Alignment);
LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
: Range.TheStores) dbgs()
<< *SI << '\n';
@@ -416,25 +413,21 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
return AMemSet;
}
-static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
- unsigned StoreAlign = SI->getAlignment();
- if (!StoreAlign)
- StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
- return StoreAlign;
+static Align findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
+ return DL.getValueOrABITypeAlignment(MaybeAlign(SI->getAlignment()),
+ SI->getOperand(0)->getType());
}
-static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
- unsigned LoadAlign = LI->getAlignment();
- if (!LoadAlign)
- LoadAlign = DL.getABITypeAlignment(LI->getType());
- return LoadAlign;
+static Align findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
+ return DL.getValueOrABITypeAlignment(MaybeAlign(LI->getAlignment()),
+ LI->getType());
}
-static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
- const LoadInst *LI) {
- unsigned StoreAlign = findStoreAlignment(DL, SI);
- unsigned LoadAlign = findLoadAlignment(DL, LI);
- return MinAlign(StoreAlign, LoadAlign);
+static Align findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
+ const LoadInst *LI) {
+ Align StoreAlign = findStoreAlignment(DL, SI);
+ Align LoadAlign = findLoadAlignment(DL, LI);
+ return commonAlignment(StoreAlign, LoadAlign);
}
// This method try to lift a store instruction before position P.
@@ -649,7 +642,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LI, SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- findCommonAlignment(DL, SI, LI), C);
+ findCommonAlignment(DL, SI, LI).value(), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -682,12 +675,11 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
auto *T = V->getType();
if (T->isAggregateType()) {
uint64_t Size = DL.getTypeStoreSize(T);
- unsigned Align = SI->getAlignment();
- if (!Align)
- Align = DL.getABITypeAlignment(T);
+ const Align MA =
+ DL.getValueOrABITypeAlignment(MaybeAlign(SI->getAlignment()), T);
IRBuilder<> Builder(SI);
auto *M =
- Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align);
+ Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, MA);
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
@@ -982,12 +974,12 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// example we could be moving from movaps -> movq on x86.
IRBuilder<> Builder(M);
if (UseMemMove)
- Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(),
- MDep->getRawSource(), MDep->getSourceAlignment(),
+ Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
else
- Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(),
- MDep->getRawSource(), MDep->getSourceAlignment(),
+ Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
+ MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
// Remove the instruction we're replacing.
@@ -1057,7 +1049,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Builder.CreateMemSet(
Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
SrcSize),
- MemSet->getOperand(1), MemsetLen, Align);
+ MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
MD->removeInstruction(MemSet);
MemSet->eraseFromParent();
@@ -1125,8 +1117,8 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
}
IRBuilder<> Builder(MemCpy);
- Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
- CopySize, MemCpy->getDestAlignment());
+ Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize,
+ MaybeAlign(MemCpy->getDestAlignment()));
return true;
}
@@ -1153,7 +1145,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
- M->getDestAlignment(), false);
+ MaybeAlign(M->getDestAlignment()), false);
MD->removeInstruction(M);
M->eraseFromParent();
++NumCpyToSet;
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index 98a45b391319e..ce1e142101b82 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 9799ea7960ec5..6b0d0202d9bb9 100644
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -83,6 +83,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 1260bd39cdee3..bba9082e31b2f 100644
--- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -82,7 +82,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -101,10 +100,12 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index b213264de557e..6a643480f3128 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -76,7 +76,6 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -94,6 +93,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
@@ -106,6 +106,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/VNCoercion.h"
#include <algorithm>
@@ -489,11 +490,11 @@ namespace {
class NewGVN {
Function &F;
- DominatorTree *DT;
- const TargetLibraryInfo *TLI;
- AliasAnalysis *AA;
- MemorySSA *MSSA;
- MemorySSAWalker *MSSAWalker;
+ DominatorTree *DT = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MemorySSA *MSSA = nullptr;
+ MemorySSAWalker *MSSAWalker = nullptr;
const DataLayout &DL;
std::unique_ptr<PredicateInfo> PredInfo;
@@ -505,7 +506,7 @@ class NewGVN {
const SimplifyQuery SQ;
// Number of function arguments, used by ranking
- unsigned int NumFuncArgs;
+ unsigned int NumFuncArgs = 0;
// RPOOrdering of basic blocks
DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
@@ -516,9 +517,9 @@ class NewGVN {
// startsout in, and represents any value. Being an optimistic analysis,
// anything in the TOP class has the value TOP, which is indeterminate and
// equivalent to everything.
- CongruenceClass *TOPClass;
+ CongruenceClass *TOPClass = nullptr;
std::vector<CongruenceClass *> CongruenceClasses;
- unsigned NextCongruenceNum;
+ unsigned NextCongruenceNum = 0;
// Value Mappings.
DenseMap<Value *, CongruenceClass *> ValueToClass;
@@ -862,7 +863,7 @@ private:
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
- int64_t StartingVNCounter;
+ int64_t StartingVNCounter = 0;
};
} // end anonymous namespace
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 68a0f5151ad50..58763ec72eced 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index beb299272ed8d..5c4a89977c388 100644
--- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -47,6 +47,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/ADT/SetVector.h"
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 124f625ef7b66..41940e980faa9 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -50,12 +49,14 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <utility>
@@ -173,7 +174,7 @@ void ReassociatePass::BuildRankMap(Function &F,
<< "\n");
}
- // Traverse basic blocks in ReversePostOrder
+ // Traverse basic blocks in ReversePostOrder.
for (BasicBlock *BB : RPOT) {
unsigned BBRank = RankMap[BB] = ++Rank << 16;
@@ -1898,6 +1899,7 @@ void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
ValueRankMap.erase(I);
Insts.remove(I);
RedoInsts.remove(I);
+ llvm::salvageDebugInfoOrMarkUndef(*I);
I->eraseFromParent();
for (auto Op : Ops)
if (Instruction *OpInst = dyn_cast<Instruction>(Op))
@@ -1914,6 +1916,7 @@ void ReassociatePass::EraseInst(Instruction *I) {
// Erase the dead instruction.
ValueRankMap.erase(I);
RedoInsts.remove(I);
+ llvm::salvageDebugInfoOrMarkUndef(*I);
I->eraseFromParent();
// Optimize its operands.
SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 3296322e00d5f..0716c13209820 100644
--- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,16 +16,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 48bbdd8d1b33f..b242f100fafff 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -54,6 +54,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 10fbdc8aacd2f..e696ea83a300e 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -29,7 +29,6 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/IR/BasicBlock.h"
@@ -49,12 +48,14 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include <cassert>
#include <utility>
@@ -2196,7 +2197,7 @@ bool llvm::runIPSCCP(
findReturnsToZap(*F, ReturnsToZap, Solver);
}
- for (const auto &F : Solver.getMRVFunctionsTracked()) {
+ for (auto F : Solver.getMRVFunctionsTracked()) {
assert(F->getReturnType()->isStructTy() &&
"The return type should be a struct");
StructType *STy = cast<StructType>(F->getReturnType());
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 74b8ff9130502..89916e43fce29 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -41,7 +41,6 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -71,6 +70,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -80,6 +80,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
@@ -361,7 +362,7 @@ private:
/// The beginning and ending offsets of the alloca for this
/// partition.
- uint64_t BeginOffset, EndOffset;
+ uint64_t BeginOffset = 0, EndOffset = 0;
/// The start and end iterators of this partition.
iterator SI, SJ;
@@ -1680,24 +1681,20 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
}
/// Compute the adjusted alignment for a load or store from an offset.
-static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
- const DataLayout &DL) {
- unsigned Alignment;
+static Align getAdjustedAlignment(Instruction *I, uint64_t Offset,
+ const DataLayout &DL) {
+ MaybeAlign Alignment;
Type *Ty;
if (auto *LI = dyn_cast<LoadInst>(I)) {
- Alignment = LI->getAlignment();
+ Alignment = MaybeAlign(LI->getAlignment());
Ty = LI->getType();
} else if (auto *SI = dyn_cast<StoreInst>(I)) {
- Alignment = SI->getAlignment();
+ Alignment = MaybeAlign(SI->getAlignment());
Ty = SI->getValueOperand()->getType();
} else {
llvm_unreachable("Only loads and stores are allowed!");
}
-
- if (!Alignment)
- Alignment = DL.getABITypeAlignment(Ty);
-
- return MinAlign(Alignment, Offset);
+ return commonAlignment(DL.getValueOrABITypeAlignment(Alignment, Ty), Offset);
}
/// Test whether we can convert a value from the old to the new type.
@@ -2300,9 +2297,9 @@ class llvm::sroa::AllocaSliceRewriter
// The new offsets of the slice currently being rewritten relative to the
// original alloca.
- uint64_t NewBeginOffset, NewEndOffset;
+ uint64_t NewBeginOffset = 0, NewEndOffset = 0;
- uint64_t SliceSize;
+ uint64_t SliceSize = 0;
bool IsSplittable = false;
bool IsSplit = false;
Use *OldUse = nullptr;
@@ -2432,13 +2429,14 @@ private:
///
/// You can optionally pass a type to this routine and if that type's ABI
/// alignment is itself suitable, this will return zero.
- unsigned getSliceAlign(Type *Ty = nullptr) {
- unsigned NewAIAlign = NewAI.getAlignment();
- if (!NewAIAlign)
- NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
- unsigned Align =
- MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
- return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
+ MaybeAlign getSliceAlign(Type *Ty = nullptr) {
+ const MaybeAlign NewAIAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(NewAI.getAlignment()), NewAI.getAllocatedType());
+ const MaybeAlign Align =
+ commonAlignment(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
+ return (Ty && Align && Align->value() == DL.getABITypeAlignment(Ty))
+ ? None
+ : Align;
}
unsigned getIndex(uint64_t Offset) {
@@ -2800,7 +2798,7 @@ private:
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
- getSliceAlign(), II.isVolatile());
+ MaybeAlign(getSliceAlign()), II.isVolatile());
if (AATags)
New->setAAMetadata(AATags);
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
@@ -2886,7 +2884,7 @@ private:
assert((IsDest && II.getRawDest() == OldPtr) ||
(!IsDest && II.getRawSource() == OldPtr));
- unsigned SliceAlign = getSliceAlign();
+ MaybeAlign SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -2956,10 +2954,10 @@ private:
// Compute the relative offset for the other pointer within the transfer.
unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
- unsigned OtherAlign =
- IsDest ? II.getSourceAlignment() : II.getDestAlignment();
- OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1,
- OtherOffset.zextOrTrunc(64).getZExtValue());
+ Align OtherAlign =
+ assumeAligned(IsDest ? II.getSourceAlignment() : II.getDestAlignment());
+ OtherAlign =
+ commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
@@ -2972,7 +2970,7 @@ private:
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
Value *DestPtr, *SrcPtr;
- unsigned DestAlign, SrcAlign;
+ MaybeAlign DestAlign, SrcAlign;
// Note: IsDest is true iff we're copying into the new alloca slice
if (IsDest) {
DestPtr = OurPtr;
@@ -3019,9 +3017,9 @@ private:
Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
- unsigned SrcAlign = OtherAlign;
+ MaybeAlign SrcAlign = OtherAlign;
Value *DstPtr = &NewAI;
- unsigned DstAlign = SliceAlign;
+ MaybeAlign DstAlign = SliceAlign;
if (!IsDest) {
std::swap(SrcPtr, DstPtr);
std::swap(SrcAlign, DstAlign);
@@ -3117,20 +3115,17 @@ private:
Instruction *I = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- unsigned LoadAlign = LI->getAlignment();
- if (!LoadAlign)
- LoadAlign = DL.getABITypeAlignment(LI->getType());
- LI->setAlignment(MaybeAlign(std::min(LoadAlign, getSliceAlign())));
+ MaybeAlign LoadAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(LI->getAlignment()), LI->getType());
+ LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- unsigned StoreAlign = SI->getAlignment();
- if (!StoreAlign) {
Value *Op = SI->getOperand(0);
- StoreAlign = DL.getABITypeAlignment(Op->getType());
- }
- SI->setAlignment(MaybeAlign(std::min(StoreAlign, getSliceAlign())));
- continue;
+ MaybeAlign StoreAlign = DL.getValueOrABITypeAlignment(
+ MaybeAlign(SI->getAlignment()), Op->getType());
+ SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ continue;
}
assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
@@ -3222,7 +3217,7 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
/// The current pointer use being rewritten. This is used to dig up the used
/// value (as opposed to the user).
- Use *U;
+ Use *U = nullptr;
/// Used to calculate offsets, and hence alignment, of subobjects.
const DataLayout &DL;
@@ -3277,7 +3272,7 @@ private:
Type *BaseTy;
/// Known alignment of the base pointer.
- unsigned BaseAlign;
+ Align BaseAlign;
/// To calculate offset of each component so we can correctly deduce
/// alignments.
@@ -3286,7 +3281,7 @@ private:
/// Initialize the splitter with an insertion point, Ptr and start with a
/// single zero GEP index.
OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
- unsigned BaseAlign, const DataLayout &DL)
+ Align BaseAlign, const DataLayout &DL)
: IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
@@ -3308,7 +3303,7 @@ private:
if (Ty->isSingleValueType()) {
unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
return static_cast<Derived *>(this)->emitFunc(
- Ty, Agg, MinAlign(BaseAlign, Offset), Name);
+ Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
}
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
@@ -3349,18 +3344,20 @@ private:
AAMDNodes AATags;
LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
- AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ AAMDNodes AATags, Align BaseAlign, const DataLayout &DL)
: OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
- DL), AATags(AATags) {}
+ DL),
+ AATags(AATags) {}
/// Emit a leaf load of a single value. This is called at the leaves of the
/// recursive emission to actually load values.
- void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
- LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load");
+ LoadInst *Load =
+ IRB.CreateAlignedLoad(Ty, GEP, Alignment.value(), Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3388,14 +3385,14 @@ private:
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
- AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ AAMDNodes AATags, Align BaseAlign, const DataLayout &DL)
: OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL),
AATags(AATags) {}
AAMDNodes AATags;
/// Emit a leaf store of a single value. This is called at the leaves of the
/// recursive emission to actually produce stores.
- void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
//
@@ -3406,7 +3403,7 @@ private:
Value *InBoundsGEP =
IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
- IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
+ IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment.value());
if (AATags)
Store->setAAMetadata(AATags);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -3863,8 +3860,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, BasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
- LI->getName());
+ getAdjustedAlignment(LI, PartOffset, DL).value(),
+ /*IsVolatile*/ false, LI->getName());
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
@@ -3921,7 +3918,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL).value(),
+ /*IsVolatile*/ false);
PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
@@ -4005,8 +4003,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
LoadPartPtrTy, LoadBasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
- LI->getName());
+ getAdjustedAlignment(LI, PartOffset, DL).value(),
+ /*IsVolatile*/ false, LI->getName());
}
// And store this partition.
@@ -4017,7 +4015,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
StorePartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL).value(),
+ /*IsVolatile*/ false);
// Now build a new slice for the alloca.
NewSlices.push_back(
@@ -4152,20 +4151,19 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// FIXME: We might want to defer PHI speculation until after here.
// FIXME: return nullptr;
} else {
- unsigned Alignment = AI.getAlignment();
- if (!Alignment) {
- // The minimum alignment which users can rely on when the explicit
- // alignment is omitted or zero is that required by the ABI for this
- // type.
- Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
- }
- Alignment = MinAlign(Alignment, P.beginOffset());
+ // If alignment is unspecified we fallback on the one required by the ABI
+ // for this type. We also make sure the alignment is compatible with
+ // P.beginOffset().
+ const Align Alignment = commonAlignment(
+ DL.getValueOrABITypeAlignment(MaybeAlign(AI.getAlignment()),
+ AI.getAllocatedType()),
+ P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
- if (Alignment <= DL.getABITypeAlignment(SliceTy))
- Alignment = 0;
+ const bool IsUnconstrained = Alignment <= DL.getABITypeAlignment(SliceTy);
NewAI = new AllocaInst(
- SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment,
+ SliceTy, AI.getType()->getAddressSpace(), nullptr,
+ IsUnconstrained ? MaybeAlign() : Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
// Copy the old AI debug location over to the new one.
NewAI->setDebugLoc(AI.getDebugLoc());
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 1d2e40bf62be7..9d088547b4369 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -82,6 +82,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerConstantIntrinsicsPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
+ initializeLowerMatrixIntrinsicsLegacyPassPass(Registry);
initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
@@ -89,6 +90,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
initializeReassociateLegacyPassPass(Registry);
+ initializeRedundantDbgInstEliminationPass(Registry);
initializeRegToMemPass(Registry);
initializeRewriteStatepointsForGCLegacyPassPass(Registry);
initializeSCCPLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2ee1a3a95f2a6..c25c6c632b8f0 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
@@ -21,6 +22,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -33,12 +35,12 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Options.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -173,8 +175,8 @@ struct VectorLayout {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
- : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
+ ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT)
+ : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT) {
}
bool visit(Function &F);
@@ -214,6 +216,8 @@ private:
GatherList Gathered;
unsigned ParallelLoopAccessMDKind;
+
+ DominatorTree *DT;
};
class ScalarizerLegacyPass : public FunctionPass {
@@ -225,6 +229,11 @@ public:
}
bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
};
} // end anonymous namespace
@@ -232,6 +241,7 @@ public:
char ScalarizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
@@ -303,7 +313,8 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
Module &M = *F.getParent();
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
return Impl.visit(F);
}
@@ -340,6 +351,15 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
return Scatterer(BB, BB->begin(), V, &Scattered[V]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
+ // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
+ // nodes in predecessors. If those predecessors are unreachable from entry,
+ // then the IR in those blocks could have unexpected properties resulting in
+ // infinite loops in Scatterer::operator[]. By simply treating values
+ // originating from instructions in unreachable blocks as undef we do not
+ // need to analyse them further.
+ if (!DT->isReachableFromEntry(VOp->getParent()))
+ return Scatterer(Point->getParent(), Point->getIterator(),
+ UndefValue::get(V->getType()));
// Put the scattered form of an instruction directly after the
// instruction.
BasicBlock *BB = VOp->getParent();
@@ -856,7 +876,10 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
Module &M = *F.getParent();
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
+ DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
bool Changed = Impl.visit(F);
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return Changed ? PA : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 41554fccdf087..2a1a040bf83ee 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -164,7 +164,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -182,6 +181,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -189,6 +189,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <string>
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index ac832b9b45671..d7a34acb43184 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -38,8 +38,10 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
@@ -263,7 +265,7 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
/// to an entirely separate nest.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
DominatorTree &DT, LoopInfo &LI,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
// If the loop is already at the top level, we can't hoist it anywhere.
Loop *OldParentL = L.getParentLoop();
if (!OldParentL)
@@ -317,7 +319,7 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// Because we just hoisted a loop out of this one, we have essentially
// created new exit paths from it. That means we need to form LCSSA PHI
// nodes for values used in the no-longer-nested loop.
- formLCSSA(*OldContainingL, DT, &LI, nullptr);
+ formLCSSA(*OldContainingL, DT, &LI, SE);
// We shouldn't need to form dedicated exits because the exit introduced
// here is the (just split by unswitching) preheader. However, after trivial
@@ -329,6 +331,20 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
}
}
+// Return the top-most loop containing ExitBB and having ExitBB as exiting block
+// or the loop containing ExitBB, if there is no parent loop containing ExitBB
+// as exiting block.
+static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
+ Loop *TopMost = LI.getLoopFor(ExitBB);
+ Loop *Current = TopMost;
+ while (Current) {
+ if (Current->isLoopExiting(ExitBB))
+ TopMost = Current;
+ Current = Current->getParentLoop();
+ }
+ return TopMost;
+}
+
/// Unswitch a trivial branch if the condition is loop invariant.
///
/// This routine should only be called when loop code leading to the branch has
@@ -413,9 +429,10 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
});
// If we have scalar evolutions, we need to invalidate them including this
- // loop and the loop containing the exit block.
+ // loop, the loop containing the exit block and the topmost parent loop
+ // exiting via LoopExitBB.
if (SE) {
- if (Loop *ExitL = LI.getLoopFor(LoopExitBB))
+ if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
SE->forgetLoop(ExitL);
else
// Forget the entire nest as this exits the entire nest.
@@ -532,7 +549,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// If this was full unswitching, we may have changed the nesting relationship
// for this loop so hoist it to its correct parent if needed.
if (FullUnswitch)
- hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -825,7 +842,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// We may have changed the nesting relationship for this loop so hoist it to
// its correct parent if needed.
- hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2260,7 +2277,7 @@ static void unswitchNontrivialInvariants(
// First build LCSSA for this loop so that we can preserve it when
// forming dedicated exits. We don't want to perturb some other loop's
// LCSSA while doing that CFG edit.
- formLCSSA(UpdateL, DT, &LI, nullptr);
+ formLCSSA(UpdateL, DT, &LI, SE);
// For loops reached by this loop's original exit blocks we may
// introduced new, non-dedicated exits. At least try to re-form dedicated
@@ -2426,7 +2443,7 @@ turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
if (MSSAU) {
MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
- MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::End);
+ MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
}
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 4544975a4887b..623a8b711ed89 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -35,10 +34,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <utility>
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp
index 90f3a2aa46e16..677d86f8c7b48 100644
--- a/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
@@ -78,7 +79,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
if (auto *Call = dyn_cast<CallBase>(Inst)) {
// Convergent operations cannot be made control-dependent on additional
// values.
- if (Call->hasFnAttr(Attribute::Convergent))
+ if (Call->isConvergent())
return false;
for (Instruction *S : Stores)
diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index e6db11f47eadd..cd7bfb2f20dc7 100644
--- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -283,12 +283,12 @@ static bool isSafeAndProfitableToSpeculateAroundPHI(
int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
if (IID)
- FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
- IncomingC->getType());
+ FoldedCost += TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(),
+ IncomingC->getType());
else
FoldedCost +=
- TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
- IncomingC->getType());
+ TTI.getIntImmCostInst(UserI->getOpcode(), Idx,
+ IncomingC->getValue(), IncomingC->getType());
// If we accumulate more folded cost for this incoming constant than
// materialized cost, then we'll regress any edge with this constant so
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index f9d027eb4a3b9..c8d899bb4871c 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -67,6 +67,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index a58c32cc58943..9f82b1263ebd1 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -60,7 +60,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -76,10 +75,12 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <limits>
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 9791cf41f6212..4ce4ce46f67ab 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -34,8 +34,10 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b27a36b67d62e..9f0ab9103d429 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -76,6 +76,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 707adf46d1f4d..c8461fdc1608b 100644
--- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -12,6 +12,7 @@
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;